PD_midbrain_snRNAseq_GSE157783
PAPER
“Single-cell sequencing of the human midbrain reveals glial activation and a neuronal state specific to Parkinson’s disease”
5 iPD and 6 Healthy control midbrain snRNAseq
#---------- Seurat
https://satijalab.org/seurat/vignettes.html
https://ucdavis-bioinformatics-training.github.io/2017_2018-single-cell-RNA-sequencing-Workshop-UCD_UCB_UCSF/day2/scRNA_Workshop-PART1.html
https://hbctraining.github.io/scRNA-seq/lessons/04_SC_quality_control.html
https://scrnaseq-course.cog.sanger.ac.uk/website/biological-analysis.html#pseudotime-analysis
https://liulab-dfci.github.io/bioinfo-combio/scrna1.html#intro-to-scrna-seq
http://bioconductor.org/books/release/OSCA/integrating-datasets.html#performing-mnn-correction
#---------- Scanpy – Single-Cell Analysis in Python
https://scanpy.readthedocs.io/en/stable/
#---------- Brain single nuclei Papers
"A single-cell atlas of the human substantia nigra reveals cell-specific pathways associated with neurological disorders" contains celltype markers
SYNAPSE - create project to download sequencing data
https://www.synapse.org/#!Synapse:syn5550382
https://www.nature.com/articles/s41586-019-1195-2
"Single-cell transcriptomic analysis of Alzheimer’s disease" contains celltype markers
https://portal.brain-map.org/atlases-and-data/rnaseq
https://portal.brain-map.org/atlases-and-data/rnaseq/human-m1-10x
https://www.biorxiv.org/content/10.1101/2020.03.31.016972v2
"Evolution of cellular diversity in primary motor cortex of human, marmoset monkey, and mouse"
"Integrative single-cell analysis of transcriptional and epigenetic states in the human adult brain"contains celltype markers
#---------- Multiplexing
https://www.nature.com/articles/s41467-019-10756-2
"Nuclei multiplexing with barcoded antibodies for single-nucleus genomics"
ASAP-SEQ
https://cite-seq.com/asapseq/
https://www.biorxiv.org/content/10.1101/2020.09.08.286914v1
#---------- Multiomic single cell papers
https://www.nature.com/articles/s41467-018-03149-4
"scNMT-seq enables joint profiling of chromatin accessibility DNA methylation and transcription in single cells"
#---------- Sample Deconvolution
DemuxEM - a computational tool that detects inter-sample multiplets and assigns singlets to their sample of origin
https://kco-cloud.readthedocs.io/en/latest/hashing_cite_seq.html
cellsnp-lite - assigns cells to donors and detects doublets, even without genotyping reference
https://github.com/single-cell-genetics/cellsnp-lite
scSplit - Genotype-free demultiplexing of pooled single-cell RNA-seq, using a hidden state model for identifying genetically distinct samples within a mixed population
https://github.com/jon-xu/scSplit
demuxlet - Genetic multiplexing of barcoded single cell RNA-seq
https://github.com/statgen/demuxlet
#---------- Doublets
DoubletFinder - an R package that predicts doublets in single-cell RNA sequencing data
#object_filtered <- subset(x = object, idents = "doublet", invert = TRUE)
#object_filtered <- subset(x = object, subset = "CD3E" > EXP_VALUE, invert = TRUE)
setwd("/Volumes/projects_secondary/bras/Lee/ASAP/PD_midbrain_snRNAseq_GSE157783")
knitr::opts_chunk$set(warning=FALSE, message=FALSE, cache=FALSE, error=FALSE, cache.lazy=FALSE)
#knitr::opts_knit$set(root.dir = '/Volumes/projects_secondary/bras/Lee/PPMI/DLB')
#options(scipen=999)
#options(stringsAsFactors = FALSE)
# Chunk Options
# include = FALSE, runs code, prevents code and results from appearing
# echo = FALSE, runs code, prevents code from appearing but not the results
# eval = FALSE, does not run code
# message = FALSE prevents messages that are generated by code from appearing
# warning = FALSE prevents warnings that are generated by code from appearing
# fig.cap = "..." adds a caption to graphical results.
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("fgsea")
#install.packages('metap')
library(corrplot)
library(cowplot)
library(data.table)
library(DoubletFinder)
library(dplyr)
library(fgsea)
library(ggplot2)
library(ggpubr)
library(gprofiler2)
library(knitr)
library(kableExtra)
library(lubridate)
library(magrittr)
library(metap)
library(multtest)
library(parallel)
library(purrr)
library(RColorBrewer)
library(readr)
library(reshape2)
library(reticulate)
library(rmarkdown)
library(Seurat)
library(stringr)
library(tibble)
library(tidyr)
library(tidyverse)
library(viridis)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] viridis_0.5.1 viridisLite_0.3.0 forcats_0.5.0
## [4] tidyverse_1.3.0 tidyr_1.1.2 tibble_3.0.5
## [7] stringr_1.4.0 Seurat_3.2.3 rmarkdown_2.6
## [10] reticulate_1.18 reshape2_1.4.4 readr_1.4.0
## [13] RColorBrewer_1.1-2 purrr_0.3.4 multtest_2.46.0
## [16] Biobase_2.50.0 BiocGenerics_0.36.0 metap_1.4
## [19] magrittr_2.0.1 lubridate_1.7.9.2 kableExtra_1.3.1
## [22] knitr_1.31 gprofiler2_0.2.0 ggpubr_0.4.0
## [25] ggplot2_3.3.3 fgsea_1.16.0 dplyr_1.0.3
## [28] DoubletFinder_2.0.3 data.table_1.13.6 cowplot_1.1.1
## [31] corrplot_0.84
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 fastmatch_1.1-0
## [4] sn_1.6-2 plyr_1.8.6 igraph_1.2.6
## [7] lazyeval_0.2.2 splines_4.0.3 BiocParallel_1.24.1
## [10] listenv_0.8.0 scattermore_0.7 TH.data_1.0-10
## [13] digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2
## [16] tensor_1.5 cluster_2.1.0 ROCR_1.0-11
## [19] openxlsx_4.2.3 globals_0.14.0 modelr_0.1.8
## [22] matrixStats_0.57.0 sandwich_3.0-0 colorspace_2.0-0
## [25] rvest_0.3.6 ggrepel_0.9.1 haven_2.3.1
## [28] rbibutils_2.0 xfun_0.20 crayon_1.3.4
## [31] jsonlite_1.7.2 spatstat.data_1.7-0 spatstat_1.64-1
## [34] survival_3.2-7 zoo_1.8-8 glue_1.4.2
## [37] polyclip_1.10-0 gtable_0.3.0 webshot_0.5.2
## [40] leiden_0.3.7 car_3.0-10 future.apply_1.7.0
## [43] abind_1.4-5 scales_1.1.1 mvtnorm_1.1-1
## [46] DBI_1.1.1 rstatix_0.7.0 miniUI_0.1.1.1
## [49] Rcpp_1.0.6 plotrix_3.8-1 xtable_1.8-4
## [52] tmvnsim_1.0-2 rsvd_1.0.3 foreign_0.8-81
## [55] stats4_4.0.3 htmlwidgets_1.5.3 httr_1.4.2
## [58] TFisher_0.2.0 ellipsis_0.3.1 ica_1.0-2
## [61] pkgconfig_2.0.3 dbplyr_2.0.0 uwot_0.1.10
## [64] deldir_0.2-9 tidyselect_1.1.0 rlang_0.4.10
## [67] later_1.1.0.1 munsell_0.5.0 cellranger_1.1.0
## [70] tools_4.0.3 cli_2.2.0 generics_0.1.0
## [73] broom_0.7.5 mathjaxr_1.0-1 ggridges_0.5.3
## [76] evaluate_0.14 fastmap_1.1.0 goftest_1.2-2
## [79] yaml_2.2.1 fs_1.5.0 fitdistrplus_1.1-3
## [82] zip_2.1.1 RANN_2.6.1 nlme_3.1-151
## [85] pbapply_1.4-3 future_1.21.0 mime_0.9
## [88] xml2_1.3.2 compiler_4.0.3 rstudioapi_0.13
## [91] plotly_4.9.3 curl_4.3 png_0.1-7
## [94] ggsignif_0.6.1 spatstat.utils_2.0-0 reprex_1.0.0
## [97] stringi_1.5.3 lattice_0.20-41 Matrix_1.3-2
## [100] vctrs_0.3.6 mutoss_0.1-12 pillar_1.4.7
## [103] lifecycle_0.2.0 Rdpack_2.1 lmtest_0.9-38
## [106] RcppAnnoy_0.0.18 irlba_2.3.3 gbRd_0.4-11
## [109] httpuv_1.5.5 patchwork_1.1.1 R6_2.5.0
## [112] promises_1.1.1 KernSmooth_2.23-18 gridExtra_2.3
## [115] rio_0.5.26 parallelly_1.23.0 codetools_0.2-18
## [118] MASS_7.3-53 assertthat_0.2.1 withr_2.4.1
## [121] sctransform_0.3.2 mnormt_2.0.2 multcomp_1.4-15
## [124] mgcv_1.8-33 hms_1.0.0 rpart_4.1-15
## [127] grid_4.0.3 carData_3.0-4 Rtsne_0.15
## [130] numDeriv_2016.8-1.1 shiny_1.6.0
cellranger-4.0.0
genome annotation indexed with gtf containing unspliced genes
#--------- Download SRA data from NCBI
https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA662780&o=acc_s%3Aa
module load sratoolkit/2.10.8
qsubs -d -a srr_names -n sratoolkit_prefetch -s "prefetch -X 20G SAMPLE"
qsubs -d -a srr_names -n sartoolkit_fastqdump -s "fastq-dump --split-files /home/lee.marshall/bras_primary/software/sratoolkit/ncbi/sra/sra/SAMPLE.sra --outdir fastq"
#--------- Rename fastq samples
while read line ; do mv $(awk '{print $1"_1.fastq",$2"_I1_001.fastq"}' <<<$line) ; done < ../srr_samples
while read line ; do mv $(awk '{print $1"_2.fastq",$2"_R1_001.fastq"}' <<<$line) ; done < ../srr_samples
while read line ; do mv $(awk '{print $1"_3.fastq",$2"_R2_001.fastq"}' <<<$line) ; done < ../srr_samples
#--------- gzip all fastq files
qsubs -a sample_names -n gzip_fastq -s "gzip fastq/SAMPLE"
#---------- premRNA GTF
awk 'BEGIN{FS="\t"; OFS="\t"} $3 == "transcript"{ print; $3="exon"; $9 = gensub("(transcript_id\\s\"{0,1})([^;\"]+)(\"{0,1});", "\\1\\2_premrna\\3;", "g", $9); print; next}{print}' \
refdata-gex-GRCh38-2020-A/genes/genes.gtf > refdata-gex-GRCh38-2020-A.premrna.gtf
#---------- premRNA Cellranger Reference
module load cellranger/cellranger-4.0.0
qsubs -n cellranger_mkref -s "cellranger mkref --genome=refdata-gex-GRCh38-2020-A_premrna --fasta=refdata-gex-GRCh38-2020-A/fasta/genome.fa --genes=refdata-gex-GRCh38-2020-A.premrna.gtf"
#---------- premRNA Cellranger Count
module load cellranger/cellranger-4.0.0
qsubs -n cellranger_count -c 5 -w 48 -a sample_names -s "cellranger count --sample=SAMPLE --id=SAMPLE_count --fastqs=./fastq --chemistry=SC3Pv3 --transcriptome=/home/lee.marshall/bras_primary/software/cellranger/refdata-gex-GRCh38-2020-A_premrna"
Quality control graphs used to identify filtering thresholds
#---------- Seurat_Object
# Get files
dir <- "counts/"
samples <- paste0(c("IPD1", "IPD2", "IPD3", "IPD4", "IPD5",
"C1", "C2", "C3", "C4", "C5", "C6"), "_count")
# Count Matrix
seurat_counts <- lapply(samples, function(i){
count <- Read10X(data.dir=paste0(dir,i,"/outs/filtered_feature_bc_matrix/"),
gene.column=2, # col1 Ensemble, col2 Symbol
unique.features=TRUE,
strip.suffix=TRUE)
seurat_obj <- CreateSeuratObject(counts=count,
project=i,
min.cells=10,
min.genes=200)
})
# Merge Seurat Objects
seurat_combined <- merge(x = seurat_counts[[1]],
y = seurat_counts[2:length(seurat_counts)],
add.cell.ids = unlist(samples),
project="Aging_Mice_GSE129788")
# Alternative Seurat Object
#d10x.data <- sapply(samples, function(i){
# d10x <- Read10X(file.path(dir,i,"/outs/filtered_feature_bc_matrix/"))
# colnames(d10x) <- paste(sapply(strsplit(colnames(d10x),split="-"),'[[',1L),i,sep="-")
# d10x
#})
#d10x.cbind <- do.call("cbind", d10x.data)
#d10x.combined <- CreateSeuratObject(
# counts=d10x.cbind,
# project="DLB_snRNAseq",
# min.cells = 10,
# min.genes = 200,
# names.field = 2,
# names.delim = "\\-")
#---------- Add Metadata
#View(seurat_combined@meta.data)
# orig.ident: contains the sample identity if known
# nCount_RNA: number of UMIs per cell
# nFeature_RNA: number of genes detected per cell
# Add number of genes per UMI for each cell to metadata
seurat_combined$log10GenesPerUMI <- log10(seurat_combined$nFeature_RNA) / log10(seurat_combined$nCount_RNA)
# Compute percent mito ratio
seurat_combined$mitoRatio <- PercentageFeatureSet(object = seurat_combined, pattern = "^MT-")
seurat_combined$mitoRatio <- seurat_combined@meta.data$mitoRatio / 100
# Create metadata dataframe
metadata <- seurat_combined@meta.data
# Add cell IDs to metadata
metadata$cells <- rownames(metadata)
# Rename columns
metadata <- metadata %>%
dplyr::rename(seq_folder = orig.ident,
nUMI = nCount_RNA,
nGene = nFeature_RNA)
# Create sample column
metadata$sample <- NA
metadata$sample[which(str_detect(metadata$cells,"^IPD1"))] <- "IPD1"
metadata$sample[which(str_detect(metadata$cells,"^IPD2"))] <- "IPD2"
metadata$sample[which(str_detect(metadata$cells,"^IPD3"))] <- "IPD3"
metadata$sample[which(str_detect(metadata$cells,"^IPD4"))] <- "IPD4"
metadata$sample[which(str_detect(metadata$cells,"^IPD5"))] <- "IPD5"
metadata$sample[which(str_detect(metadata$cells,"^C1"))] <- "HC1"
metadata$sample[which(str_detect(metadata$cells,"^C2"))] <- "HC2"
metadata$sample[which(str_detect(metadata$cells,"^C3"))] <- "HC3"
metadata$sample[which(str_detect(metadata$cells,"^C4"))] <- "HC4"
metadata$sample[which(str_detect(metadata$cells,"^C5"))] <- "HC5"
metadata$sample[which(str_detect(metadata$cells,"^C6"))] <- "HC6"
# Create group column
metadata$group <- NA
metadata$group[which(str_detect(metadata$cells,"^IPD1"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD2"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD3"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD4"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD5"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^C1"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C2"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C3"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C4"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C5"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C6"))] <- "HC"
# Add metadata back to Seurat object
seurat_combined@meta.data <- metadata
# rm data
rm(seurat_counts, metadata)
# Create .RData object to load at any time
save(seurat_combined, file="seurat_combined.RData")
#---------- Cell_Counts
# Load data
load("seurat_combined.RData")
# Visualize the number of cell counts per sample
seurat_combined@meta.data %>%
ggplot(aes(x=sample, fill=sample)) +
geom_bar() +
ggtitle("Number of Cells") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold"))
#---------- UMI_Counts_per_cell (transcripts)
# Visualize the number UMIs/transcripts per cell
seurat_combined@meta.data %>%
ggplot(aes(color=sample, x=nUMI, fill= sample)) +
geom_density(alpha = 0.2) +
geom_vline(xintercept = 1000, linetype="dashed", colour = "red3") +
geom_vline(xintercept = 20000, linetype="dashed", colour = "red3") +
ggtitle("Number of UMIs/Transcripts per Cell") +
ylab("Cell density") +
scale_x_log10() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold"))
#---------- Genes_per_cell
# Visualize the distribution of genes detected per cell via histogram
p1 <- seurat_combined@meta.data %>%
ggplot(aes(color=sample, x=nGene, fill= sample)) +
geom_density(alpha = 0.2) +
geom_vline(xintercept = 500, linetype="dashed", colour = "red3") +
geom_vline(xintercept = 10000, linetype="dashed", colour = "red3") +
ggtitle("Number of Genes per Cell") +
scale_x_log10() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold"))
# Visualize the distribution of genes detected per cell via boxplot
p2 <- seurat_combined@meta.data %>%
ggplot(aes(x=sample, y=log10(nGene), fill=sample)) +
geom_boxplot() +
ggtitle("Number of Genes vs Number of Cells") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold"))
# Plots
cowplot::plot_grid(p1, p2, ncol=2)
#---------- UMI_Counts_vs_Genes
# Visualize the correlation between genes detected and number of UMIs/transcripts and determine whether strong presence of cells with low numbers of genes/UMIs
seurat_combined@meta.data %>%
ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) +
geom_point() +
geom_vline(xintercept = 1000, linetype="dashed", colour = "red3") +
geom_vline(xintercept = 20000, linetype="dashed", colour = "red3") +
geom_hline(yintercept = 500, linetype="dashed", colour = "red3") +
geom_hline(yintercept = 10000, linetype="dashed", colour = "red3") +
ggtitle("Number of UMIs/Transcripts vs Number of Cells") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
scale_colour_viridis_c() +
facet_wrap(~sample)
#---------- Mitochondrial_counts_ratio
# Visualize the distribution of mitochondrial gene expression detected per cell
seurat_combined@meta.data %>%
ggplot(aes(color=sample, x=mitoRatio, fill=sample)) +
geom_density(alpha = 0.2) +
geom_vline(xintercept = 0.20, linetype="dashed", colour = "red3") +
ggtitle("Mitochondrial Counts Ratio") +
ylab("Cell density") +
scale_x_log10() +
theme_classic()
#---------- Complexity
# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
seurat_combined@meta.data %>%
ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) +
geom_density(alpha = 0.2) +
geom_vline(xintercept = 0.85, linetype="dashed", colour = "red3") +
ggtitle("Complexity Genes per UMI") +
ylab("Cell density") +
theme_classic()
Remove poor quilty cells and doublets
Keep high quality cells without removing biologically relevant cells
* nUMI >= 1000
* nUMI <= 20000
* nGene >= 500
* nGene <= 10000
* mitoRatio < 0.20
* log10GenesPerUMI > 0.85
#---------- Seurat_Filtering
# Filter out low quality reads using selected thresholds - these will change with experiment
seurat_filtered <- subset(x = seurat_combined,
subset= (nUMI >= 1000) &
(nUMI <= 20000) &
(nGene >= 500) &
(nGene <= 10000) &
(mitoRatio < 0.20) &
(log10GenesPerUMI > 0.85))
# Create .RData object to load at any time
save(seurat_filtered, file="seurat_filtered.RData")
#---------- Cell_Counts_Pre_and_Post_Filtered
# Load data
load("seurat_filtered.RData")
# Number of Cells
nCells_pre <- seurat_combined@meta.data$sample %>%
table() %>%
as.data.frame() %>%
dplyr::rename(Sample=".")
nCells_post <- seurat_filtered@meta.data$sample %>%
table() %>%
as.data.frame() %>%
dplyr::rename(Sample=".")
cbind(nCells_pre, nCells_post) %>% kable(caption="Number of Cells", format.args = list(big.mark = ",")) %>% kable_styling(c("striped", "bordered")) %>% add_header_above(c("Pre-Filtered" = 2, "Post-Filtered" = 2))
| Sample | Freq | Sample | Freq |
|---|---|---|---|
| HC1 | 5,561 | HC1 | 4,386 |
| HC2 | 5,520 | HC2 | 4,644 |
| HC3 | 2,628 | HC3 | 2,418 |
| HC4 | 5,266 | HC4 | 4,926 |
| HC5 | 3,101 | HC5 | 2,799 |
| HC6 | 7,186 | HC6 | 6,071 |
| IPD1 | 2,964 | IPD1 | 2,520 |
| IPD2 | 7,180 | IPD2 | 6,318 |
| IPD3 | 4,296 | IPD3 | 3,906 |
| IPD4 | 2,797 | IPD4 | 2,454 |
| IPD5 | 6,621 | IPD5 | 5,694 |
# Violin_plots
p1 <- VlnPlot(object = seurat_combined, features = c("nGene", "nUMI", "mitoRatio", "log10GenesPerUMI"), ncol = 4, pt.size=0, group.by="sample")
p2 <- VlnPlot(object = seurat_filtered, features = c("nGene", "nUMI", "mitoRatio", "log10GenesPerUMI"), ncol = 4, pt.size=0, group.by="sample")
cowplot::plot_grid(p1, p2, labels=c("Pre-Filtered", "Post-Filtered"), nrow=2)
# Scatter_plots
p1 <- FeatureScatter(object = seurat_combined, feature1 = "nGene", feature2 = "nUMI", pt.size=0.01, group.by="sample")
p2 <- FeatureScatter(object = seurat_combined, feature1 = "nGene", feature2 = "mitoRatio", pt.size=0.01, group.by="sample")
p3 <- FeatureScatter(object = seurat_filtered, feature1 = "nGene", feature2 = "nUMI", pt.size=0.01, group.by="sample")
p4 <- FeatureScatter(object = seurat_filtered, feature1 = "nGene", feature2 = "mitoRatio", pt.size=0.01, group.by="sample")
# Plots
cowplot::plot_grid(p1, p3, p2, p4, labels=c("Pre-Filtered", "Post-Filtered"), nrow=2, ncol=2)
# Remove data
rm(seurat_combined)
Determine if cell cycle regression is required
#---------- Cell_Cycle_Scoring
# Normalize the counts
seurat_phase <- NormalizeData(seurat_filtered)
# Load cell cycle markers
load(file="seurat_cycle.rda")
# Score cells for cell cycle
seurat_phase <- CellCycleScoring(seurat_phase,
g2m.features = g2m_genes,
s.features = s_genes)
# Identify the most variable genes
seurat_phase <- FindVariableFeatures(seurat_phase,
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
# Scale the counts
seurat_phase <- ScaleData(seurat_phase)
# Perform PCA
seurat_phase <- RunPCA(seurat_phase)
# Create .RData object to load at any time
save(seurat_phase, file="seurat_phase.RData")
# Load data
load("seurat_phase.RData")
# Plot the PCA colored by cell cycle phase
DimPlot(seurat_phase,
reduction = "pca",
group.by= "Phase",
split.by = "Phase")
# you can regress out the s and g2m score
# alternatively you could only regress out the s-g2m score to keep non-cuyling and cycling cell differences
DimPlot(seurat_phase,
reduction = "pca",
group.by= "Phase",
split.by = "sample")
# Remove data
rm(seurat_phase)
Standard batch correction is not considered standard anymore
SCTransform batch correction allows for better normalization
SCTransform method models the UMI counts using a regularized negative binomial
model to remove the variation due to sequencing depth (total nUMIs per cell),
while adjusting the variance based on pooling information across genes with
similar abundances (similar to some bulk RNA-seq methods).
#---------- Batch_Correction_SCTransform
# The data is preprocessed by splitting into individual ‘assays’.
# In this experiment, each individual sample is considered an ‘assay’.
# Ensure the dataset used here is the non-normalized dataset
DefaultAssay(seurat_filtered) <- "RNA"
# Split seurat object by condition to perform cell cycle scoring and SCT on all samples
seurat_split <- SplitObject(seurat_filtered, split.by = "sample")
load(file="seurat_cycle.rda")
for (i in 1:length(seurat_split)) {
seurat_split[[i]] <- NormalizeData(seurat_split[[i]],
verbose = TRUE)
seurat_split[[i]] <- CellCycleScoring(seurat_split[[i]],
g2m.features=g2m_genes,
s.features=s_genes)
seurat_split[[i]] <- SCTransform(seurat_split[[i]],
vars.to.regress = c("mitoRatio"))
}
# Select the most variable features to use for integration
seurat_features <- SelectIntegrationFeatures(object.list = seurat_split,
nfeatures = 3000)
# Prepare the SCT list object for integration
seurat_split <- PrepSCTIntegration(object.list = seurat_split,
anchor.features = seurat_features)
# Find best buddies - can take a while to run
seurat_anchors <- FindIntegrationAnchors(object.list = seurat_split,
normalization.method = "SCT",
anchor.features = seurat_features)
# Integrate across conditions
seurat_integrated <- IntegrateData(anchorset = seurat_anchors,
normalization.method = "SCT")
# Run PCA
seurat_integrated <- RunPCA(object = seurat_integrated)
ElbowPlot(object = seurat_integrated,
ndims = 50)
# Run UMAP
seurat_integrated <- RunUMAP(seurat_integrated,
dims = 1:40,
reduction = "pca")
# Create .RData object to load at any time
save(seurat_integrated, file="seurat_integrated.RData")
# Remove data
rm(seurat_filtered, seurat_anchors, seurat_split)
# Load data
load(file="seurat_integrated.RData")
# Plot PCA
PCAPlot(seurat_integrated,
group.by = "sample",
split.by = "group")
PCAPlot(seurat_integrated,
group.by = "sample")
# Explore heatmap of PCs
DimHeatmap(seurat_integrated,
dims = 1:9,
cells = 500,
balanced = TRUE)
# Printing out the most variable genes driving PCs
print(x = seurat_integrated[["pca"]],
dims = 1:9,
nfeatures = 5)
## PC_ 1
## Positive: PLXDC2, DOCK8, ARHGAP15, APBB1IP, LNCAROD
## Negative: FAM155A, FGF14, CSMD1, LRRTM4, OPCML
## PC_ 2
## Positive: ST18, PLP1, CTNNA3, PEX5L, RNF220
## Negative: FRMD4A, PLXDC2, ARHGAP24, LRMDA, SLC8A1
## PC_ 3
## Positive: HPSE2, LINC01088, TRPM3, GPC5, SPARCL1
## Negative: SLC8A1, CNTNAP2, LRRTM4, FRMD4A, CSMD1
## PC_ 4
## Positive: EPAS1, FLT1, COBLL1, CLDN5, ABCB1
## Negative: HPSE2, TRPM3, LINC01088, NRG3, AC012405.1
## PC_ 5
## Positive: ROBO2, SYT1, NRG1, LINGO2, TENM2
## Negative: VCAN, TNR, MEGF11, PTPRZ1, PDZD2
## PC_ 6
## Positive: ATP10A, ELOVL7, BTNL9, ABCB1, ANO2
## Negative: SLC6A12, DCN, NOTCH3, PTH1R, LAMA2
## PC_ 7
## Positive: CFAP54, CFAP299, C8orf34, DNAH11, SPAG17
## Negative: TRPM3, GPC5, OBI1-AS1, LSAMP, SLC1A2
## PC_ 8
## Positive: PLCB1, LINC02055, RORB, OTX2-AS1, SYNPR
## Negative: DPP10, TENM2, DCLK1, RBFOX1, KCNB2
## PC_ 9
## Positive: GPC5, SLC1A2, CACNB2, CABLES1, NRXN1
## Negative: DPP10, CPAMD8, AC012405.1, CD44, DCLK1
# Plot the elbow plot
ElbowPlot(object = seurat_integrated,
ndims = 50)
# Plot UMAP
DimPlot(seurat_integrated, reduction = "umap", group.by = "sample")
Seurat uses a graph-based clustering approach, which embeds cells in a graph
structure, using a K-nearest neighbor (KNN) graph (by default), with edges
drawn between cells with similar gene expression patterns.
The resolution is an important argument that sets the “granularity” of the
downstream clustering and will need to be optimized for every individual
experiment. For datasets of 3,000 - 5,000 cells, the resolution set between
0.4-1.4 generally yields good clustering. Increased resolution values lead to a
greater number of clusters, which is often required for larger datasets.
#---------- Clustering_Cells
# Determine the K-nearest neighbor graph
seurat_cluster <- FindNeighbors(object = seurat_integrated,
dims = 1:40)
# Determine the clusters for various resolutions
seurat_cluster <- FindClusters(object = seurat_cluster,
resolution = c(0.2, 0.4, 0.6, 0.8, 1.0, 1.2))
# Explore resolutions
# seurat_cluster@meta.data %>% View()
# Remove data
rm(seurat_integrated)
# Create .RData object to load at any time
save(seurat_cluster, file="seurat_cluster.RData")
# Load data
load("seurat_cluster.RData")
#---------- Explore resolutions
# Assign identity of clusters
Idents(object = seurat_cluster) <- "integrated_snn_res.1.2"
# Plot UMAP
DimPlot(seurat_cluster, reduction = "umap", label = TRUE)
UMAP_0.4
UMAP resolution used 1.2
Determine whether clusters represent true cell types or cluster due to
biological or technical variation, such as clusters of cells in the S phase of
the cell cycle, clusters of specific batches, or cells with high mitochondrial
content.
# Extract identity and sample information from seurat object to determine the number of cells per cluster per sample
nCells <- FetchData(seurat_cluster,
vars = c("ident", "sample")) %>%
dplyr::count(ident, sample) %>%
tidyr::spread(ident, n)
# View table
#View(n_cells)
kable(nCells, caption="Number of Cells", format.args = list(big.mark = ",")) %>% kable_styling(c("striped", "bordered"))
| sample | 0 | 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 2 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 3 | 30 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| HC1 | 449 | 515 | 132 | 87 | 104 | 192 | 158 | 155 | 105 | 125 | 95 | 66 | 388 | 37 | 11 | 53 | 49 | 31 | 49 | 8 | 5 | 9 | 21 | 391 | 3 | 151 | 122 | 187 | 191 | 205 | 292 |
| HC2 | 540 | 395 | 139 | 102 | 95 | 198 | 117 | 144 | 106 | 148 | 60 | 80 | 367 | 18 | 145 | 41 | 57 | 7 | 56 | 16 | 3 | 10 | 1 | 297 | 8 | 136 | 149 | 242 | 298 | 216 | 453 |
| HC3 | 370 | 352 | 150 | 50 | 114 | 67 | 65 | 86 | 51 | 103 | 13 | 21 | 200 | 3 | 5 | 10 | 14 | 7 | 10 | 20 | 2 | 4 | 4 | 138 | 6 | 104 | 128 | 42 | 126 | 86 | 67 |
| HC4 | 895 | 746 | 208 | 148 | 153 | 69 | 128 | 175 | 145 | 89 | 57 | 34 | 405 | 47 | 33 | 34 | 19 | 64 | 10 | 32 | NA | 10 | 2 | 372 | 4 | 215 | 211 | 82 | 176 | 288 | 75 |
| HC5 | 314 | 291 | 110 | 77 | 93 | 144 | 82 | 115 | 86 | 64 | 26 | 24 | 261 | 28 | 8 | 22 | 15 | 14 | 20 | 14 | 4 | 5 | 7 | 265 | 1 | 121 | 106 | 60 | 129 | 154 | 139 |
| HC6 | 747 | 647 | 170 | 83 | 218 | 252 | 177 | 198 | 113 | 127 | 103 | 211 | 473 | 30 | 15 | 88 | 112 | 13 | 53 | 19 | NA | 32 | 5 | 511 | 10 | 188 | 186 | 594 | 223 | 275 | 198 |
| IPD1 | 237 | 217 | 176 | 110 | 147 | 99 | 63 | 58 | 50 | 54 | 16 | 60 | 167 | 29 | 38 | 21 | 22 | 29 | 9 | 8 | 15 | 7 | 7 | 163 | 10 | 215 | 185 | 82 | 68 | 79 | 79 |
| IPD2 | 484 | 374 | 263 | 237 | 151 | 127 | 231 | 93 | 219 | 99 | 535 | 143 | 392 | 294 | 114 | 75 | 47 | 109 | 27 | 28 | 27 | 11 | 18 | 470 | 7 | 291 | 278 | 293 | 328 | 276 | 277 |
| IPD3 | 278 | 259 | 200 | 239 | 133 | 115 | 126 | 89 | 100 | 93 | 64 | 66 | 282 | 113 | 76 | 11 | 24 | 9 | 12 | 10 | 60 | 11 | 5 | 253 | 4 | 412 | 266 | 143 | 178 | 158 | 117 |
| IPD4 | 233 | 202 | 76 | 62 | 79 | 49 | 122 | 76 | 108 | 66 | 101 | 29 | 198 | 23 | 10 | 43 | 20 | 3 | 4 | 13 | NA | 11 | 1 | 263 | 3 | 99 | 187 | 64 | 93 | 147 | 69 |
| IPD5 | 494 | 452 | 277 | 539 | 193 | 141 | 125 | 126 | 92 | 164 | 22 | 118 | 317 | 44 | 85 | 94 | 64 | 17 | 26 | 20 | 48 | 32 | 19 | 305 | 27 | 635 | 315 | 306 | 256 | 160 | 181 |
# UMAP of cells in each cluster by sample
DimPlot(seurat_cluster,
label = TRUE,
split.by = "sample") + NoLegend()
# plot
FeaturePlot(seurat_cluster,
reduction = "umap",
features = c("nUMI", "nGene", "log10GenesPerUMI", "mitoRatio"),
pt.size = 0.4,
sort.cell = TRUE,
min.cutoff = 'q10',
label = TRUE)
# cell phases
nCells_phase <-FetchData(seurat_cluster,
vars = c("ident", "Phase")) %>%
dplyr::count(ident, Phase) %>%
tidyr::spread(ident, n)
# View table
#View(n_cells)
kable(nCells, caption="Number of Cells", format.args = list(big.mark = ",")) %>% kable_styling(c("striped", "bordered"))
| sample | 0 | 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 2 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 3 | 30 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| HC1 | 449 | 515 | 132 | 87 | 104 | 192 | 158 | 155 | 105 | 125 | 95 | 66 | 388 | 37 | 11 | 53 | 49 | 31 | 49 | 8 | 5 | 9 | 21 | 391 | 3 | 151 | 122 | 187 | 191 | 205 | 292 |
| HC2 | 540 | 395 | 139 | 102 | 95 | 198 | 117 | 144 | 106 | 148 | 60 | 80 | 367 | 18 | 145 | 41 | 57 | 7 | 56 | 16 | 3 | 10 | 1 | 297 | 8 | 136 | 149 | 242 | 298 | 216 | 453 |
| HC3 | 370 | 352 | 150 | 50 | 114 | 67 | 65 | 86 | 51 | 103 | 13 | 21 | 200 | 3 | 5 | 10 | 14 | 7 | 10 | 20 | 2 | 4 | 4 | 138 | 6 | 104 | 128 | 42 | 126 | 86 | 67 |
| HC4 | 895 | 746 | 208 | 148 | 153 | 69 | 128 | 175 | 145 | 89 | 57 | 34 | 405 | 47 | 33 | 34 | 19 | 64 | 10 | 32 | NA | 10 | 2 | 372 | 4 | 215 | 211 | 82 | 176 | 288 | 75 |
| HC5 | 314 | 291 | 110 | 77 | 93 | 144 | 82 | 115 | 86 | 64 | 26 | 24 | 261 | 28 | 8 | 22 | 15 | 14 | 20 | 14 | 4 | 5 | 7 | 265 | 1 | 121 | 106 | 60 | 129 | 154 | 139 |
| HC6 | 747 | 647 | 170 | 83 | 218 | 252 | 177 | 198 | 113 | 127 | 103 | 211 | 473 | 30 | 15 | 88 | 112 | 13 | 53 | 19 | NA | 32 | 5 | 511 | 10 | 188 | 186 | 594 | 223 | 275 | 198 |
| IPD1 | 237 | 217 | 176 | 110 | 147 | 99 | 63 | 58 | 50 | 54 | 16 | 60 | 167 | 29 | 38 | 21 | 22 | 29 | 9 | 8 | 15 | 7 | 7 | 163 | 10 | 215 | 185 | 82 | 68 | 79 | 79 |
| IPD2 | 484 | 374 | 263 | 237 | 151 | 127 | 231 | 93 | 219 | 99 | 535 | 143 | 392 | 294 | 114 | 75 | 47 | 109 | 27 | 28 | 27 | 11 | 18 | 470 | 7 | 291 | 278 | 293 | 328 | 276 | 277 |
| IPD3 | 278 | 259 | 200 | 239 | 133 | 115 | 126 | 89 | 100 | 93 | 64 | 66 | 282 | 113 | 76 | 11 | 24 | 9 | 12 | 10 | 60 | 11 | 5 | 253 | 4 | 412 | 266 | 143 | 178 | 158 | 117 |
| IPD4 | 233 | 202 | 76 | 62 | 79 | 49 | 122 | 76 | 108 | 66 | 101 | 29 | 198 | 23 | 10 | 43 | 20 | 3 | 4 | 13 | NA | 11 | 1 | 263 | 3 | 99 | 187 | 64 | 93 | 147 | 69 |
| IPD5 | 494 | 452 | 277 | 539 | 193 | 141 | 125 | 126 | 92 | 164 | 22 | 118 | 317 | 44 | 85 | 94 | 64 | 17 | 26 | 20 | 48 | 32 | 19 | 305 | 27 | 635 | 315 | 306 | 256 | 160 | 181 |
# Explore whether clusters segregate by cell cycle phase
DimPlot(seurat_cluster,
label = TRUE,
split.by = "Phase") + NoLegend()
# plot
FeaturePlot(seurat_cluster,
reduction = "umap",
features = c("S.Score", "G2M.Score"),
pt.size = 0.4,
sort.cell = TRUE,
min.cutoff = 'q10',
label = TRUE)
# Defining the information in the seurat object of interest
# Extracting this data from the seurat object
umap_data <- FetchData(seurat_cluster,
vars = c(paste0("PC_", 1:16),
"ident", "UMAP_1", "UMAP_2"))
# Extract the UMAP coordinates for the first 10 cells
#seurat_cluster@reductions$umap@cell.embeddings[1:10, 1:2]
# Adding cluster label to center of cluster on UMAP
umap_label <- FetchData(seurat_cluster,
vars = c("ident", "UMAP_1", "UMAP_2")) %>%
group_by(ident) %>%
summarise(x=mean(UMAP_1), y=mean(UMAP_2))
# Plotting a UMAP plot for each of the PCs
map(paste0("PC_", 1:16), function(pc){
ggplot(umap_data,
aes(UMAP_1, UMAP_2)) +
geom_point(aes_string(color=pc),
alpha = 0.7) +
scale_color_gradient(guide = FALSE,
low = "grey90",
high = "blue") +
geom_text(data=umap_label,
aes(label=ident, x, y)) +
ggtitle(pc)
}) %>%
plot_grid(plotlist = .)
# Explore heatmap of PCs
DimHeatmap(seurat_cluster,
dims = 1:9,
cells = 500,
balanced = TRUE)
# Printing out the most variable genes driving PCs
print(x = seurat_cluster[["pca"]],
dims = 1:9,
nfeatures = 5)
## PC_ 1
## Positive: PLXDC2, DOCK8, ARHGAP15, APBB1IP, LNCAROD
## Negative: FAM155A, FGF14, CSMD1, LRRTM4, OPCML
## PC_ 2
## Positive: ST18, PLP1, CTNNA3, PEX5L, RNF220
## Negative: FRMD4A, PLXDC2, ARHGAP24, LRMDA, SLC8A1
## PC_ 3
## Positive: HPSE2, LINC01088, TRPM3, GPC5, SPARCL1
## Negative: SLC8A1, CNTNAP2, LRRTM4, FRMD4A, CSMD1
## PC_ 4
## Positive: EPAS1, FLT1, COBLL1, CLDN5, ABCB1
## Negative: HPSE2, TRPM3, LINC01088, NRG3, AC012405.1
## PC_ 5
## Positive: ROBO2, SYT1, NRG1, LINGO2, TENM2
## Negative: VCAN, TNR, MEGF11, PTPRZ1, PDZD2
## PC_ 6
## Positive: ATP10A, ELOVL7, BTNL9, ABCB1, ANO2
## Negative: SLC6A12, DCN, NOTCH3, PTH1R, LAMA2
## PC_ 7
## Positive: CFAP54, CFAP299, C8orf34, DNAH11, SPAG17
## Negative: TRPM3, GPC5, OBI1-AS1, LSAMP, SLC1A2
## PC_ 8
## Positive: PLCB1, LINC02055, RORB, OTX2-AS1, SYNPR
## Negative: DPP10, TENM2, DCLK1, RBFOX1, KCNB2
## PC_ 9
## Positive: GPC5, SLC1A2, CACNB2, CABLES1, NRXN1
## Negative: DPP10, CPAMD8, AC012405.1, CD44, DCLK1
Generate cell type-specific clusters and use known markers to determine the
identities of the clusters.
Seurat’s FeaturePlot() function let’s us easily explore the known markers on top of our UMAP visualizations. Let’s go through and determine the identities of the clusters. To access the expression levels of all genes, rather than just the 3000 most highly variable genes, we can use the normalized count data stored in the RNA assay slot.
#---------- Marker_Identification
# Select the RNA counts slot to be the default assay
DefaultAssay(seurat_cluster) <- "RNA"
# Normalize RNA data for visualization purposes
seurat_markers <- NormalizeData(seurat_cluster, verbose = FALSE)
# Create .RData object to load at any time
save(seurat_markers, file="seurat_markers.RData")
#---------- Marker_Identification
## remove the x-axis text and tick
## plot.margin to adjust the white space between each plot.
## ... pass any arguments to VlnPlot in Seurat
vlnplot_gg <- function(obj,
feature,
pt.size = 0,
plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
...){
p <- VlnPlot(obj, features = feature, pt.size = pt.size, ... ) +
xlab("") +
ylab(feature) +
ggtitle("") +
theme(legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = rel(1), angle = 0),
axis.text.y = element_text(size = rel(1)),
plot.margin = plot.margin )
return(p)
}
## extract the max value of the y axis
vlnplot_max<- function(p){
ymax<- max(ggplot_build(p)$layout$panel_scales_y[[1]]$range$range)
return(ceiling(ymax))
}
## main function
vlnplot_stacked <- function(obj, features,
pt.size = 0,
plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
...){
plot_list<- purrr::map(features, function(x) vlnplot_gg(obj = obj,
feature = x, ...))
# Add back x-axis title to bottom plot. patchwork is going to support this?
plot_list[[length(plot_list)]] <- plot_list[[length(plot_list)]] +
theme(axis.text.x=element_text(),
axis.ticks.x = element_line())
# change the y-axis tick to only max value
ymaxs <- purrr::map_dbl(plot_list, vlnplot_max)
plot_list <- purrr::map2(plot_list, ymaxs, function(x,y) x +
scale_y_continuous(breaks = c(y)) +
expand_limits(y = y))
p <- patchwork::wrap_plots(plotlist = plot_list, ncol = 1)
return(p)
}
# Load data
load("seurat_markers.RData")
#---------- UMAP
DimPlot(seurat_markers, reduction="umap", label = TRUE)
# Original 25 dinemensions 0.4 resolution
#---------- Astrocyte_Markers
# AQP4, GFAP (5,10,12,18)
vlnplot_stacked(obj = seurat_markers,
features = c("AQP4", "GFAP"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("AQP4", "GFAP"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- CADPS2
# CADPS2, TIAM1 (27)
vlnplot_stacked(obj = seurat_markers,
features = c("CADPS2", "TIAM1"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("CADPS2", "TIAM1"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Dopaminergic_Markers
# TH, SLC6A3 ()
vlnplot_stacked(obj = seurat_markers,
features = c("TH", "SLC6A3"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("TH", "SLC6A3"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Endothelial_Markers
# FLT1, DUSP1, RGS5, CLDN5 (6)
vlnplot_stacked(obj = seurat_markers,
features = c("FLT1", "DUSP1", "RGS5", "CLDN5"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("FLT1", "DUSP1", "RGS5", "CLDN5"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Ependymal_Markers
# FOXJ1 (21)
vlnplot_stacked(obj = seurat_markers,
features = c("FOXJ1"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("FOXJ1"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Excitatory_Markers
# SATB2, SLC17A6 (13,25)
vlnplot_stacked(obj = seurat_markers,
features = c("SATB2", "SLC17A6"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("SATB2", "SLC17A6"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- GABA_Markers
# GRIK1 (22)
vlnplot_stacked(obj = seurat_markers,
features = c("GRIK1"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("GRIK1"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Inhibitory_Markers
# GAD1, GAD2 (9)
vlnplot_stacked(obj = seurat_markers,
features = c("GAD1", "GAD2"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("GAD1", "GAD2"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Microglia_Markers
# CSF3R, APBB1IP, CD74 (4,11,20,29,30)
vlnplot_stacked(obj = seurat_markers,
features = c("CSF3R", "APBB1IP", "CD74"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("CSF3R", "APBB1IP", "CD74"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Oligodendrocytes_Markers
# MOG, MOBP (0,1,2,3,8,14,15,16,24,26)
vlnplot_stacked(obj = seurat_markers,
features = c("MOG", "MOBP"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("MOG", "MOBP"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- OPC_Markers
# OLIG1, VCAN (7,17)
vlnplot_stacked(obj = seurat_markers,
features = c("OLIG1", "VCAN"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("OLIG1", "VCAN"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Pericyte_Markers
# PDGFRB (19,23,28)
vlnplot_stacked(obj = seurat_markers,
features = c("PDGFRB"))
FeaturePlot(seurat_markers,
reduction = "umap",
features = c("PDGFRB"),
min.cutoff = "q10",
max.cutoff = "q90",
label = TRUE)
#---------- Cluster_Annotation
#manually add annotations derived above
seurat_anno <- RenameIdents(seurat_markers,
`0` = "Oligodendrocytes",
`1` = "Oligodendrocytes",
`2` = "Oligodendrocytes",
`3` = "Oligodendrocytes",
`4` = "Microglia",
`5` = "Astrocytes",
`6` = "Endothelial",
`7` = "OPC",
`8` = "Oligodendrocytes",
`9` = "Inhibitory",
`10` = "Astrocytes",
`11` = "Microglia",
`12` = "Astrocytes",
`13` = "Excitatory",
`14` = "Oligodendrocytes",
`15` = "Oligodendrocytes",
`16` = "Oligodendrocytes",
`17` = "OPC",
`18` = "Astrocytes",
`19` = "Pericytes",
`20` = "Microglia",
`21` = "Ependymal",
`22` = "GABA",
`23` = "Pericytes",
`24` = "Oligodendrocytes",
`25` = "Excitatory",
`26` = "Oligodendrocytes",
`27` = "CADPS2",
`28` = "Pericytes",
`29` = "Microglia",
`30` = "Microglia")
#---------- UMAP
DimPlot(seurat_anno, reduction="umap", label = TRUE)
# save these annotations in meta.data
seurat_anno@meta.data$cluster_annotation <- Idents(seurat_anno)
# Create .RData object to load at any time
save(seurat_anno, file="seurat_anno.RData")
# Load data
load("seurat_anno.RData")
# Remove data
rm(seurat_markers)
# UMAP cell clusters
DimPlot(seurat_anno, reduction="umap", label = TRUE, label.size = 3, repel = TRUE)
# may want to ask if we are truely seperating interneurons from excitatory
DotPlot(seurat_anno,
features = c("AQP4", "GFAP", # Astrocyte_Markers
"CADPS2", "TIAM1", # CADPS2_Markers
"FLT1", "DUSP1", # Endothelial_Markers
"SATB2", "SLC17A7", # Excitatory_Markers
"GRIK1", # GABA_Markers
"GAD1", "GAD2", # Inhibitory_Markers
"CSF3R", "APBB1IP", # Microglia_Markers
"MOG", "MOBP", # Oligodendrocytes_Markers
"OLIG1", "VCAN", # OPC_Markers
"PDGFRB"), # Pericyte_Markers
cols = c("blue", "red"),
dot.scale = 8) +
RotatedAxis()
# If we want to remove a cell cluster
#seurat_subset <- subset(seurat_integrated, idents = "Endothelial_cells", invert = TRUE)
#---------- Cluster_Cell_Counts
# Visualize the number of cell counts per cluster
seurat_anno@meta.data %>%
ggplot(aes(x=cluster_annotation, fill=sample)) +
geom_bar() +
ggtitle("Number of Cells") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
scale_fill_brewer(palette="Dark2")
# percentage of cell per cluster per sample
seurat_anno@meta.data %>%
dplyr::count(cluster_annotation, sample) %>%
dplyr::group_by(sample) %>%
dplyr::mutate(freq = n / sum(n)) %>%
ggplot(aes(x=cluster_annotation, y=freq, fill=sample)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Percentage of Cells") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
scale_fill_brewer(palette="Dark2")
Identification of all markers for each cluster
This type of analysis is typically recommended for when evaluating a single
sample group/condition. With the FindAllMarkers() function we are comparing each
cluster against all other clusters to identify potential marker genes. The cells
in each cluster are treated as replicates, and essentially a differential
expression analysis is performed with some statistical test. Also, by default
this function will return to you genes that exhibit both positive and negative
expression changes. Typically, we add an argument only.pos to opt for keeping only the positive changes.
Identification of conserved markers in all conditions
Since we have samples representing different conditions in our dataset, our best
option is to find conserved markers. This function internally separates out
cells by sample group/condition, and then performs differential gene expression
testing for a single specified cluster against all other clusters (or a second
cluster, if specified). Gene-level p-values are computed for each condition and
then combined across groups using meta-analysis methods from the MetaDE R
package. Before we start our marker identification we will explicitly set our
default assay, we want to use the original counts and not the integrated data.
DefaultAssay(seurat_anno) <- “RNA”
#---------- Conserved_Markers
# Load data
load("seurat_anno.RData")
# RNA stores the original and normalized counts for finding markers
DefaultAssay(seurat_anno) <- "RNA"
# Ensembl & Symbol Gene IDs
dir <- "counts/"
samples <- list.files(dir)
Gene_Emsembl_Symbol <- read_tsv(paste0(dir,samples[[1]],
"/outs/filtered_feature_bc_matrix/features.tsv.gz"),
col_names = c("Gene_Emsembl","Gene_Symbol","Assay_type"),
col_types="ccc")[,c(1:2)] %>%
unique()
cell_types <- seurat_anno$cluster_annotation %>%
unique() %>% as.character() %>% sort()
#---------- Find Conserved markers across sample groups
get_conserved <- function(seurat_rna,ident_name){
FindConservedMarkers(seurat_rna,
ident.1 = ident_name,
grouping.var = "group",
only.pos = TRUE,
min.diff.pct = 0.1,
min.pct = 0.1,
logfc.threshold = 0.25) %>%
rownames_to_column(var = "Gene_Symbol") %>%
left_join(Gene_Emsembl_Symbol, by="Gene_Symbol") %>%
dplyr::mutate(Conserved_Markers = ident_name)
}
Conserved_Markers <- mclapply(cell_types, function(i) {
get_conserved(seurat_anno,i)
}) %>% bind_rows()
# save the results
write_delim(Conserved_Markers,
"PD_midbrain_snRNAseq_GSE157783_Conserved_Markers.txt",
delim = "\t")
#----- Pathways
get_gprofiler <- function(markers,ident_name){
genes <- markers %>%
dplyr::filter(Conserved_Markers == ident_name)
gost(query = genes$Gene_Symbol,
organism = "hsapiens",
correction_method = "g_SCS")$result %>%
dplyr::filter(p_value <= 0.05) %>%
dplyr::mutate(Conserved_Markers = ident_name) %>%
dplyr::select(-parents)
}
gprofiler_pathways <- mclapply(cell_types, function(i) {
get_gprofiler(Conserved_Markers,i)
}) %>% bind_rows()
# save the results
write_delim(as.data.frame(gprofiler_pathways),
"PD_midbrain_snRNAseq_GSE157783_Conserved_Markers_gprofiler_pathways.txt",
delim = "\t")
# Read Marker files
Conserved_Markers <- read_delim("PD_midbrain_snRNAseq_GSE157783_Conserved_Markers.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
# Number of Conserved_Markers
Conserved_Markers %>%
dplyr::count(Conserved_Markers) %>%
kable(caption="Number of Conserved_Markers",
format.args = list(big.mark = ",")) %>%
kable_styling(c("striped", "bordered"))
| Conserved_Markers | n |
|---|---|
| Astrocytes | 850 |
| CADPS2 | 410 |
| Endothelial | 1,335 |
| Ependymal | 1,235 |
| Excitatory | 1,128 |
| GABA | 910 |
| Inhibitory | 1,037 |
| Microglia | 964 |
| Oligodendrocytes | 987 |
| OPC | 809 |
| Pericytes | 1,228 |
# Extract top 10 markers per cluster
Conserved_Markers %>%
group_by(Conserved_Markers) %>%
top_n(n = 10,
wt = max_pval) %>%
kable(caption="Top10 Conserved_Markers",
format.args = list(big.mark = ",")) %>%
kable_styling(c("striped", "bordered"))
| Gene_Symbol | IPD_p_val | IPD_avg_logFC | IPD_pct.1 | IPD_pct.2 | IPD_p_val_adj | HC_p_val | HC_avg_logFC | HC_pct.1 | HC_pct.2 | HC_p_val_adj | max_pval | minimump_p_val | Gene_Emsembl | Conserved_Markers |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SEPTIN9 | 0.00e+00 | 0.4750577 | 0.328 | 0.152 | 0.0000000 | 0.0000000 | 0.3227437 | 0.254 | 0.151 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000184640 | Astrocytes |
| AZIN1-AS1 | 0.00e+00 | 0.3445284 | 0.299 | 0.198 | 0.0000000 | 0.0000000 | 0.5029874 | 0.470 | 0.288 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000253320 | Astrocytes |
| DPH6-DT | 0.00e+00 | 0.4956806 | 0.546 | 0.393 | 0.0000000 | 0.0000000 | 0.4100117 | 0.445 | 0.341 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000248079 | Astrocytes |
| NRP2 | 0.00e+00 | 0.3184634 | 0.286 | 0.183 | 0.0000000 | 0.0000000 | 0.4687946 | 0.348 | 0.181 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000118257 | Astrocytes |
| FAM27C | 0.00e+00 | 0.4698023 | 0.361 | 0.219 | 0.0000000 | 0.0000000 | 0.3636731 | 0.340 | 0.237 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000231527 | Astrocytes |
| GOLIM4 | 0.00e+00 | 0.2800258 | 0.377 | 0.272 | 0.0000000 | 0.0000000 | 0.4300831 | 0.402 | 0.239 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000173905 | Astrocytes |
| AC046134.2 | 0.00e+00 | 0.3517161 | 0.439 | 0.338 | 0.0000000 | 0.0000000 | 0.4073069 | 0.540 | 0.417 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000248932 | Astrocytes |
| LTBP3 | 0.00e+00 | 0.4081220 | 0.344 | 0.225 | 0.0000000 | 0.0000000 | 0.3802234 | 0.348 | 0.247 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000168056 | Astrocytes |
| WDR6 | 0.00e+00 | 0.3676120 | 0.322 | 0.206 | 0.0000000 | 0.0000000 | 0.3226289 | 0.270 | 0.166 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000178252 | Astrocytes |
| DYNC2H1 | 0.00e+00 | 0.2612596 | 0.410 | 0.297 | 0.0000000 | 0.0000000 | 0.3380476 | 0.510 | 0.384 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000187240 | Astrocytes |
| FRY | 0.00e+00 | 1.2180883 | 0.853 | 0.324 | 0.0000000 | 0.3448368 | 0.3441919 | 0.429 | 0.323 | 1.0000000 | 0.3448368 | 0.00e+00 | ENSG00000073910 | CADPS2 |
| ADAM22 | 0.00e+00 | 1.3850225 | 0.800 | 0.321 | 0.0000000 | 0.2203285 | 0.2997259 | 0.500 | 0.388 | 1.0000000 | 0.2203285 | 0.00e+00 | ENSG00000008277 | CADPS2 |
| AC025159.1 | 0.00e+00 | 1.1013532 | 0.520 | 0.220 | 0.0000000 | 0.3039371 | 0.3276836 | 0.429 | 0.321 | 1.0000000 | 0.3039371 | 0.00e+00 | ENSG00000257815 | CADPS2 |
| GSE1 | 0.00e+00 | 0.6835713 | 0.527 | 0.235 | 0.0000000 | 0.2207884 | 0.5067989 | 0.286 | 0.185 | 1.0000000 | 0.2207884 | 0.00e+00 | ENSG00000131149 | CADPS2 |
| NR2F1-AS1 | 0.00e+00 | 0.5435472 | 0.453 | 0.197 | 0.0000000 | 0.2277530 | 0.4900916 | 0.286 | 0.182 | 1.0000000 | 0.2277530 | 0.00e+00 | ENSG00000237187 | CADPS2 |
| PER3 | 0.00e+00 | 0.5172644 | 0.553 | 0.362 | 0.0000002 | 0.7094629 | 0.3009337 | 0.286 | 0.415 | 1.0000000 | 0.7094629 | 0.00e+00 | ENSG00000049246 | CADPS2 |
| PTEN | 0.00e+00 | 0.4477482 | 0.660 | 0.498 | 0.0000078 | 0.9527536 | 0.2537171 | 0.357 | 0.458 | 1.0000000 | 0.9527536 | 0.00e+00 | ENSG00000171862 | CADPS2 |
| ZC3H14 | 0.00e+00 | 0.4839486 | 0.493 | 0.346 | 0.0007162 | 0.2056443 | 0.4187910 | 0.429 | 0.327 | 1.0000000 | 0.2056443 | 1.00e-07 | ENSG00000100722 | CADPS2 |
| SPOCK2 | 0.00e+00 | 0.3636621 | 0.347 | 0.184 | 0.0008550 | 0.2388565 | 0.2522941 | 0.286 | 0.184 | 1.0000000 | 0.2388565 | 1.00e-07 | ENSG00000107742 | CADPS2 |
| KLHL7 | 4.65e-05 | 0.3792577 | 0.347 | 0.231 | 1.0000000 | 0.2342242 | 0.3395831 | 0.357 | 0.245 | 1.0000000 | 0.2342242 | 9.31e-05 | ENSG00000122550 | CADPS2 |
| STXBP6 | 0.00e+00 | 0.3372773 | 0.465 | 0.363 | 0.0000000 | 0.0000000 | 0.6048888 | 0.608 | 0.442 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000168952 | Endothelial |
| MAST4 | 0.00e+00 | 0.3579624 | 0.550 | 0.437 | 0.0000000 | 0.0000000 | 0.4652129 | 0.565 | 0.363 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000069020 | Endothelial |
| ATL3 | 0.00e+00 | 0.2585679 | 0.307 | 0.204 | 0.0000000 | 0.0000000 | 0.3370431 | 0.380 | 0.203 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000184743 | Endothelial |
| FBXW7 | 0.00e+00 | 0.3349889 | 0.486 | 0.385 | 0.0000000 | 0.0000000 | 0.4441103 | 0.537 | 0.371 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000109670 | Endothelial |
| VPS13A | 0.00e+00 | 0.2869240 | 0.385 | 0.274 | 0.0000000 | 0.0000000 | 0.3218606 | 0.378 | 0.240 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000197969 | Endothelial |
| MTUS1 | 0.00e+00 | 0.2518388 | 0.782 | 0.666 | 0.0000000 | 0.0000000 | 0.2896472 | 0.843 | 0.708 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000129422 | Endothelial |
| PSMB1 | 0.00e+00 | 0.2825220 | 0.372 | 0.267 | 0.0000000 | 0.0000000 | 0.2780885 | 0.387 | 0.258 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000008018 | Endothelial |
| PABPC1 | 0.00e+00 | 0.2641459 | 0.491 | 0.384 | 0.0000000 | 0.0000000 | 0.3224899 | 0.442 | 0.321 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000070756 | Endothelial |
| PPIB | 0.00e+00 | 0.2656296 | 0.301 | 0.200 | 0.0000000 | 0.0000000 | 0.2514439 | 0.312 | 0.195 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000166794 | Endothelial |
| UQCRB | 0.00e+00 | 0.2861097 | 0.327 | 0.225 | 0.0000000 | 0.0000000 | 0.2709786 | 0.308 | 0.200 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000156467 | Endothelial |
| ENAH | 0.00e+00 | 0.6169393 | 0.805 | 0.391 | 0.0000000 | 0.0000000 | 0.2683167 | 0.654 | 0.470 | 0.0005474 | 0.0000000 | 0.00e+00 | ENSG00000154380 | Ependymal |
| KANSL1L | 0.00e+00 | 0.3733139 | 0.858 | 0.587 | 0.0000000 | 0.0000000 | 0.2634195 | 0.705 | 0.499 | 0.0000133 | 0.0000000 | 0.00e+00 | ENSG00000144445 | Ependymal |
| AC113414.1 | 0.00e+00 | 0.3731328 | 0.443 | 0.195 | 0.0000000 | 0.0000000 | 0.2700146 | 0.410 | 0.236 | 0.0000515 | 0.0000000 | 0.00e+00 | ENSG00000254186 | Ependymal |
| MYO9A | 0.00e+00 | 0.3565367 | 0.854 | 0.568 | 0.0000000 | 0.0000000 | 0.2643601 | 0.797 | 0.614 | 0.0001034 | 0.0000000 | 0.00e+00 | ENSG00000066933 | Ependymal |
| ZMAT1 | 0.00e+00 | 0.2831260 | 0.628 | 0.323 | 0.0000000 | 0.0000000 | 0.2561522 | 0.645 | 0.444 | 0.0000580 | 0.0000000 | 0.00e+00 | ENSG00000166432 | Ependymal |
| CDH11 | 0.00e+00 | 0.3113416 | 0.579 | 0.293 | 0.0000000 | 0.0000000 | 0.2841062 | 0.622 | 0.411 | 0.0000082 | 0.0000000 | 0.00e+00 | ENSG00000140937 | Ependymal |
| ZNF235 | 0.00e+00 | 0.2827051 | 0.458 | 0.212 | 0.0000000 | 0.0000000 | 0.2788253 | 0.392 | 0.220 | 0.0000089 | 0.0000000 | 0.00e+00 | ENSG00000159917 | Ependymal |
| MARCH6 | 0.00e+00 | 0.2770438 | 0.830 | 0.603 | 0.0000000 | 0.0000000 | 0.2677482 | 0.770 | 0.591 | 0.0000012 | 0.0000000 | 0.00e+00 | ENSG00000145495 | Ependymal |
| TSC22D2 | 0.00e+00 | 0.2624078 | 0.687 | 0.455 | 0.0000000 | 0.0000000 | 0.3138391 | 0.594 | 0.402 | 0.0001398 | 0.0000000 | 0.00e+00 | ENSG00000196428 | Ependymal |
| CHD2 | 0.00e+00 | 0.2534162 | 0.836 | 0.666 | 0.0000000 | 0.0000000 | 0.3078358 | 0.802 | 0.624 | 0.0000095 | 0.0000000 | 0.00e+00 | ENSG00000173575 | Ependymal |
| MT-ND2 | 0.00e+00 | 0.3055538 | 0.969 | 0.857 | 0.0000000 | 0.0000000 | 0.5808768 | 0.976 | 0.829 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000198763 | Excitatory |
| HSP90AB1 | 1.00e-07 | 0.3702898 | 0.823 | 0.625 | 0.0035144 | 0.0000000 | 0.9761000 | 0.789 | 0.534 | 0.0000000 | 0.0000001 | 0.00e+00 | ENSG00000096384 | Excitatory |
| CALM1 | 0.00e+00 | 0.6541679 | 0.901 | 0.638 | 0.0000000 | 0.0000000 | 0.6551169 | 0.867 | 0.697 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000198668 | Excitatory |
| ATP6V0C | 0.00e+00 | 0.4609087 | 0.542 | 0.342 | 0.0000000 | 0.0000000 | 0.6136952 | 0.529 | 0.357 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000185883 | Excitatory |
| AC013287.1 | 0.00e+00 | 0.3626707 | 0.300 | 0.173 | 0.0000000 | 0.0000000 | 0.3617282 | 0.239 | 0.121 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000228566 | Excitatory |
| SERINC1 | 0.00e+00 | 0.4032031 | 0.657 | 0.400 | 0.0000000 | 0.0000000 | 0.4248225 | 0.571 | 0.409 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000111897 | Excitatory |
| TERF2IP | 0.00e+00 | 0.3118220 | 0.658 | 0.416 | 0.0000000 | 0.0000000 | 0.3518871 | 0.660 | 0.470 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000166848 | Excitatory |
| MORF4L1 | 0.00e+00 | 0.2943523 | 0.701 | 0.460 | 0.0000000 | 0.0000000 | 0.3043919 | 0.575 | 0.446 | 0.0000001 | 0.0000000 | 0.00e+00 | ENSG00000185787 | Excitatory |
| ATP1B1 | 0.00e+00 | 0.3565889 | 0.867 | 0.538 | 0.0000000 | 0.0000000 | 0.7406684 | 0.788 | 0.595 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000143153 | Excitatory |
| PPIA | 0.00e+00 | 0.3311437 | 0.571 | 0.362 | 0.0000000 | 0.0000002 | 0.3267490 | 0.481 | 0.375 | 0.0040698 | 0.0000002 | 0.00e+00 | ENSG00000196262 | Excitatory |
| PTPRS | 0.00e+00 | 0.5967663 | 0.869 | 0.432 | 0.0000000 | 0.0000000 | 0.3343587 | 0.661 | 0.480 | 0.0000004 | 0.0000000 | 0.00e+00 | ENSG00000105426 | GABA |
| IDS | 0.00e+00 | 0.5067879 | 0.734 | 0.382 | 0.0000000 | 0.0000000 | 0.3158146 | 0.581 | 0.404 | 0.0000008 | 0.0000000 | 0.00e+00 | ENSG00000010404 | GABA |
| TMEM161B-AS1 | 0.00e+00 | 0.4393899 | 0.910 | 0.630 | 0.0000000 | 0.0000000 | 0.2592223 | 0.819 | 0.636 | 0.0000035 | 0.0000000 | 0.00e+00 | ENSG00000247828 | GABA |
| BACH2 | 0.00e+00 | 0.3974816 | 0.824 | 0.476 | 0.0000000 | 0.0000000 | 0.2761182 | 0.742 | 0.540 | 0.0000019 | 0.0000000 | 0.00e+00 | ENSG00000112182 | GABA |
| DCC | 0.00e+00 | 0.8467024 | 0.443 | 0.209 | 0.0000000 | 0.0000000 | 0.5498832 | 0.403 | 0.237 | 0.0000006 | 0.0000000 | 0.00e+00 | ENSG00000187323 | GABA |
| DOP1A | 0.00e+00 | 0.3012632 | 0.730 | 0.413 | 0.0000000 | 0.0000000 | 0.2796247 | 0.677 | 0.468 | 0.0000002 | 0.0000000 | 0.00e+00 | ENSG00000083097 | GABA |
| PER3 | 0.00e+00 | 0.2977141 | 0.660 | 0.360 | 0.0000000 | 0.0000000 | 0.2852912 | 0.641 | 0.413 | 0.0000002 | 0.0000000 | 0.00e+00 | ENSG00000049246 | GABA |
| RPRD2 | 0.00e+00 | 0.2719440 | 0.803 | 0.497 | 0.0000000 | 0.0000000 | 0.2533176 | 0.726 | 0.517 | 0.0000006 | 0.0000000 | 0.00e+00 | ENSG00000163125 | GABA |
| CLSTN1 | 0.00e+00 | 0.2573252 | 0.652 | 0.410 | 0.0000000 | 0.0000000 | 0.2786382 | 0.597 | 0.401 | 0.0000042 | 0.0000000 | 0.00e+00 | ENSG00000171603 | GABA |
| LUC7L3 | 0.00e+00 | 0.2638336 | 0.947 | 0.746 | 0.0000006 | 0.0000000 | 0.3096814 | 0.887 | 0.783 | 0.0000430 | 0.0000000 | 0.00e+00 | ENSG00000108848 | GABA |
| EHBP1 | 0.00e+00 | 0.3112867 | 0.722 | 0.533 | 0.0000000 | 0.0000000 | 0.4749452 | 0.769 | 0.479 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000115504 | Inhibitory |
| FAM222B | 0.00e+00 | 0.2623521 | 0.614 | 0.405 | 0.0000000 | 0.0000000 | 0.3677829 | 0.680 | 0.418 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000173065 | Inhibitory |
| CTIF | 0.00e+00 | 0.2545172 | 0.675 | 0.474 | 0.0000000 | 0.0000000 | 0.4173490 | 0.701 | 0.473 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000134030 | Inhibitory |
| LRP1B | 0.00e+00 | 0.3631062 | 0.910 | 0.756 | 0.0000000 | 0.0000000 | 0.4498869 | 0.929 | 0.787 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000168702 | Inhibitory |
| CAMK2D | 0.00e+00 | 0.3261693 | 0.746 | 0.572 | 0.0000000 | 0.0000000 | 0.3474505 | 0.755 | 0.541 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000145349 | Inhibitory |
| SUPT3H | 0.00e+00 | 0.2657936 | 0.700 | 0.539 | 0.0000000 | 0.0000000 | 0.3112079 | 0.743 | 0.521 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000196284 | Inhibitory |
| TTN-AS1 | 0.00e+00 | 0.3661652 | 0.452 | 0.310 | 0.0000000 | 0.0000000 | 0.3891230 | 0.535 | 0.340 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000237298 | Inhibitory |
| ACACA | 0.00e+00 | 0.3533133 | 0.734 | 0.576 | 0.0000000 | 0.0000000 | 0.2699299 | 0.769 | 0.606 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000278540 | Inhibitory |
| LARGE1 | 0.00e+00 | 0.3425878 | 0.669 | 0.489 | 0.0000000 | 0.0000000 | 0.3388124 | 0.643 | 0.459 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000133424 | Inhibitory |
| AL589740.1 | 0.00e+00 | 0.7779582 | 0.573 | 0.398 | 0.0000000 | 0.0000000 | 0.4114561 | 0.615 | 0.490 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000271860 | Inhibitory |
| LINC00623 | 0.00e+00 | 0.5235028 | 0.402 | 0.210 | 0.0000000 | 0.0000000 | 0.4282357 | 0.308 | 0.199 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000226067 | Microglia |
| ATP10D | 0.00e+00 | 0.4567753 | 0.389 | 0.227 | 0.0000000 | 0.0000000 | 0.4276893 | 0.308 | 0.202 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000145246 | Microglia |
| CDC14A | 0.00e+00 | 0.3477190 | 0.323 | 0.165 | 0.0000000 | 0.0000000 | 0.3262990 | 0.244 | 0.138 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000079335 | Microglia |
| ARPC3 | 0.00e+00 | 0.4324433 | 0.368 | 0.224 | 0.0000000 | 0.0000000 | 0.4413968 | 0.329 | 0.224 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000111229 | Microglia |
| FLOT1 | 0.00e+00 | 0.4552752 | 0.391 | 0.255 | 0.0000000 | 0.0000000 | 0.4737700 | 0.315 | 0.208 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000137312 | Microglia |
| BAZ1A | 0.00e+00 | 0.4152126 | 0.391 | 0.253 | 0.0000000 | 0.0000000 | 0.4286763 | 0.307 | 0.192 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000198604 | Microglia |
| AL008633.1 | 0.00e+00 | 0.4766687 | 0.227 | 0.114 | 0.0000000 | 0.0000000 | 0.5024777 | 0.212 | 0.111 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000225689 | Microglia |
| OSBPL11 | 0.00e+00 | 0.3141018 | 0.358 | 0.228 | 0.0000000 | 0.0000000 | 0.3102440 | 0.318 | 0.204 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000144909 | Microglia |
| CNTLN | 0.00e+00 | 0.4254490 | 0.367 | 0.250 | 0.0000000 | 0.0000000 | 0.4456522 | 0.341 | 0.231 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000044459 | Microglia |
| SRBD1 | 0.00e+00 | 0.4278456 | 0.321 | 0.212 | 0.0000000 | 0.0000000 | 0.4663284 | 0.330 | 0.228 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000068784 | Microglia |
| AC006148.1 | 0.00e+00 | 0.5980009 | 0.615 | 0.271 | 0.0000000 | 0.0000000 | 0.2562314 | 0.485 | 0.326 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000242593 | Oligodendrocytes |
| TOX3 | 0.00e+00 | 0.5634575 | 0.398 | 0.097 | 0.0000000 | 0.0000000 | 0.2585942 | 0.256 | 0.121 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000103460 | Oligodendrocytes |
| GAS2 | 0.00e+00 | 0.4337700 | 0.378 | 0.129 | 0.0000000 | 0.0000000 | 0.2592877 | 0.327 | 0.188 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000148935 | Oligodendrocytes |
| AC007364.1 | 0.00e+00 | 0.3332576 | 0.309 | 0.161 | 0.0000000 | 0.0000000 | 0.6048222 | 0.480 | 0.237 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000231969 | Oligodendrocytes |
| UBB | 0.00e+00 | 0.2562117 | 0.480 | 0.311 | 0.0000000 | 0.0000000 | 0.3487071 | 0.518 | 0.303 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000170315 | Oligodendrocytes |
| CALN1 | 0.00e+00 | 0.3778386 | 0.519 | 0.282 | 0.0000000 | 0.0000000 | 0.2673905 | 0.510 | 0.361 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000183166 | Oligodendrocytes |
| CYP7B1 | 0.00e+00 | 0.2566274 | 0.312 | 0.175 | 0.0000000 | 0.0000000 | 0.4132043 | 0.463 | 0.275 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000172817 | Oligodendrocytes |
| UTY | 0.00e+00 | 0.2716488 | 0.711 | 0.557 | 0.0000000 | 0.0000000 | 0.3300194 | 0.669 | 0.504 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000183878 | Oligodendrocytes |
| LINC01184 | 0.00e+00 | 0.3072642 | 0.529 | 0.309 | 0.0000000 | 0.0000000 | 0.2637137 | 0.562 | 0.410 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000245937 | Oligodendrocytes |
| NKAIN1 | 0.00e+00 | 0.2928061 | 0.178 | 0.050 | 0.0000000 | 0.0000000 | 0.2869113 | 0.184 | 0.075 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000084628 | Oligodendrocytes |
| EPB41L5 | 0.00e+00 | 0.2572713 | 0.386 | 0.221 | 0.0000000 | 0.0000000 | 0.3103264 | 0.434 | 0.226 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000115109 | OPC |
| SFPQ | 0.00e+00 | 0.2531912 | 0.715 | 0.570 | 0.0000000 | 0.0000000 | 0.3095741 | 0.773 | 0.602 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000116560 | OPC |
| DANT2 | 0.00e+00 | 0.2920226 | 0.553 | 0.409 | 0.0000000 | 0.0000000 | 0.3482132 | 0.724 | 0.536 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000235244 | OPC |
| PHF14 | 0.00e+00 | 0.2640296 | 0.738 | 0.570 | 0.0000000 | 0.0000000 | 0.2918490 | 0.763 | 0.580 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000106443 | OPC |
| AL359232.1 | 0.00e+00 | 0.3453247 | 0.164 | 0.056 | 0.0000000 | 0.0000000 | 0.2807060 | 0.241 | 0.131 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000258561 | OPC |
| SCMH1 | 0.00e+00 | 0.2618803 | 0.598 | 0.387 | 0.0000000 | 0.0000000 | 0.2552713 | 0.625 | 0.426 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000010803 | OPC |
| AC002460.2 | 0.00e+00 | 0.3406536 | 0.288 | 0.135 | 0.0000000 | 0.0000000 | 0.3089465 | 0.286 | 0.165 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000287292 | OPC |
| PTPN4 | 0.00e+00 | 0.2592480 | 0.594 | 0.411 | 0.0000000 | 0.0000000 | 0.2516403 | 0.631 | 0.455 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000088179 | OPC |
| PPA2 | 0.00e+00 | 0.2770489 | 0.663 | 0.478 | 0.0000000 | 0.0000000 | 0.2502462 | 0.624 | 0.464 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000138777 | OPC |
| SUCLG2-AS1 | 0.00e+00 | 0.2922460 | 0.323 | 0.169 | 0.0000000 | 0.0000000 | 0.2617787 | 0.295 | 0.185 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000241316 | OPC |
| RNF144B | 0.00e+00 | 0.2648653 | 0.232 | 0.130 | 0.0000000 | 0.0000000 | 0.5048203 | 0.329 | 0.084 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000137393 | Pericytes |
| CENPP | 0.00e+00 | 0.4318045 | 0.331 | 0.230 | 0.0000000 | 0.0000000 | 0.5721510 | 0.453 | 0.271 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000188312 | Pericytes |
| SEC31A | 0.00e+00 | 0.2831367 | 0.507 | 0.396 | 0.0000000 | 0.0000000 | 0.3559391 | 0.575 | 0.395 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000138674 | Pericytes |
| PLEKHG3 | 0.00e+00 | 0.3573501 | 0.444 | 0.255 | 0.0000000 | 0.0000000 | 0.2749392 | 0.472 | 0.342 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000126822 | Pericytes |
| CREB3L2 | 0.00e+00 | 0.3078524 | 0.432 | 0.306 | 0.0000000 | 0.0000000 | 0.3385584 | 0.469 | 0.295 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000182158 | Pericytes |
| ANO10 | 0.00e+00 | 0.3136331 | 0.493 | 0.372 | 0.0000000 | 0.0000000 | 0.3333278 | 0.510 | 0.338 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000160746 | Pericytes |
| SLC39A10 | 0.00e+00 | 0.2882655 | 0.406 | 0.295 | 0.0000000 | 0.0000000 | 0.4245030 | 0.431 | 0.277 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000196950 | Pericytes |
| EVA1C | 0.00e+00 | 0.2801387 | 0.567 | 0.425 | 0.0000000 | 0.0000000 | 0.2588896 | 0.670 | 0.538 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000166979 | Pericytes |
| SMG6 | 0.00e+00 | 0.3528398 | 0.513 | 0.387 | 0.0000000 | 0.0000000 | 0.2800313 | 0.528 | 0.425 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000070366 | Pericytes |
| TANK | 0.00e+00 | 0.2961007 | 0.540 | 0.418 | 0.0000000 | 0.0000000 | 0.2950809 | 0.541 | 0.405 | 0.0000000 | 0.0000000 | 0.00e+00 | ENSG00000136560 | Pericytes |
# Searchable table
Conserved_Markers %>%
mutate_if(is.numeric, ~round(.,4)) %>%
DT::datatable()
# Read Pathways files
gprofiler_pathways <- read_delim("PD_midbrain_snRNAseq_GSE157783_Conserved_Markers_gprofiler_pathways.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
cell_types <- seurat_anno$cluster_annotation %>%
unique() %>% as.character() %>% sort()
#---------- Conserved_Markers_Gprofiler
gprofiler_top10 <- function(pathway,ident_name){
pathway %>%
dplyr::filter(Conserved_Markers == ident_name) %>%
dplyr::select(Conserved_Markers,
source,
term_id,
term_name,
term_size,
query_size,
intersection_size,
recall,
p_value) %>%
top_n(n = -10, wt = p_value) %>%
kable(caption="Marker Pathways Top10",
format.args = list(big.mark = ",")) %>%
kable_styling(c("striped", "bordered"))
}
gprofiler_top10fig <- function(pathway,ident_name){
pathway %>%
dplyr::filter(Conserved_Markers == ident_name) %>%
dplyr::select(Conserved_Markers,
source,
term_id,
term_name,
term_size,
query_size,
intersection_size,
recall,
p_value) %>%
top_n(n = -10, wt = p_value) %>%
ggplot(aes(x=recall,
y=term_name,
colour=p_value,
size=intersection_size)) +
geom_point() +
expand_limits(x=0) +
labs(x="Hits (%)",
y=ident_name,
colour="p value",
size="Count") +
theme_bw()
}
gprofiler_fig <- function(markers,ident_name){
genes <- markers %>%
dplyr::filter(Conserved_Markers == ident_name)
gost(query = genes$Gene_Symbol,
organism = "hsapiens",
correction_method = "g_SCS") %>%
gostplot(capped = FALSE, interactive = FALSE)
}
mclapply(cell_types, function(i) {
gprofiler_top10(gprofiler_pathways,i)
gprofiler_top10fig(gprofiler_pathways,i)
gprofiler_fig(Conserved_Markers,i)
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
# Searchable table
gprofiler_pathways %>%
dplyr::select(Conserved_Markers,
source,
term_id,
term_name,
term_size,
query_size,
intersection_size,
recall,
p_value) %>%
dplyr::mutate_if(is.numeric, ~round(.,4)) %>%
DT::datatable()
Find DEG between old and young mice per celltype Using 0.5 logFC
#---------- Differential_Expression
# RNA stores the original and normalized counts for finding markers
DefaultAssay(seurat_anno) <- "RNA"
# add group_cluster info to "identity" for each barcoded UMI count
seurat_anno@meta.data$group_cluster <- paste(seurat_anno@meta.data$group, seurat_anno@meta.data$cluster_annotation, sep = "_")
# make this the Idents()
Idents(seurat_anno) <- seurat_anno$group_cluster
#---------- Find Differential expressed genes between groups
get_deg <- function(seurat_rna,ident1,ident2){
FindMarkers(seurat_rna,
ident.1 = ident1,
ident.2 = ident2,
test.use = "MAST",
latent.vars = NULL,
logfc.threshold = 0,
min.pct = 0,
min.diff.pct = -Inf,
min.cells.feature = 10,
min.cells.group = 10,
verbose = FALSE) %>%
rownames_to_column(var = "Gene_Symbol") %>%
left_join(Gene_Emsembl_Symbol, by="Gene_Symbol") %>%
dplyr::mutate(Comparison = paste(ident1, ident2, sep = "_vs_"))
}
#---------- DEG
cell_types <- seurat_anno$cluster_annotation %>%
unique() %>% as.character() %>% sort()
DEG <- mclapply(cell_types, mc.cores = 5, function(i) {
id1 <- paste0("IPD_",i)
id2 <- paste0("HC_",i)
get_deg(seurat_anno,id1,id2)
}) %>% bind_rows()
# save the results
write_delim(DEG,
"PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST.txt",
delim = "\t")
# Read Marker files
DEG <- read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
# Number of DEGs
merge(DEG %>%
dplyr::count(Comparison) %>%
dplyr::rename(Total_Genes = n),
DEG %>%
dplyr::filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 |
p_val_adj <= 0.05 & avg_logFC <= -0.25) %>%
dplyr::count(Comparison) %>%
dplyr::rename(Genes_FDR0.05_logFC0.25 = n),
by = "Comparison") %>%
kable(caption="Number of Differentially Expressed Genes",
format.args = list(big.mark = ",")) %>%
kable_styling(c("striped", "bordered"))
| Comparison | Total_Genes | Genes_FDR0.05_logFC0.25 |
|---|---|---|
| IPD_Astrocytes_vs_HC_Astrocytes | 24,297 | 325 |
| IPD_CADPS2_vs_HC_CADPS2 | 17,729 | 18 |
| IPD_Endothelial_vs_HC_Endothelial | 23,545 | 397 |
| IPD_Ependymal_vs_HC_Ependymal | 23,033 | 394 |
| IPD_Excitatory_vs_HC_Excitatory | 24,987 | 218 |
| IPD_GABA_vs_HC_GABA | 22,065 | 142 |
| IPD_Inhibitory_vs_HC_Inhibitory | 24,925 | 150 |
| IPD_Microglia_vs_HC_Microglia | 24,228 | 281 |
| IPD_Oligodendrocytes_vs_HC_Oligodendrocytes | 23,839 | 283 |
| IPD_OPC_vs_HC_OPC | 24,868 | 117 |
| IPD_Pericytes_vs_HC_Pericytes | 23,088 | 262 |
# Extract top 10 DEGs per cluster
DEG %>%
group_by(Comparison) %>%
top_n(n = -10,
wt = p_val_adj) %>%
kable(caption="Differentially Expressed Genes",
format.args = list(big.mark = ",")) %>%
kable_styling(c("striped", "bordered"))
| Gene_Symbol | p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | Gene_Emsembl | Comparison |
|---|---|---|---|---|---|---|---|
| NEAT1 | 0e+00 | 1.0102347 | 0.994 | 0.978 | 0.0000000 | ENSG00000245532 | IPD_Astrocytes_vs_HC_Astrocytes |
| SLC39A11 | 0e+00 | 0.6187040 | 0.934 | 0.855 | 0.0000000 | ENSG00000133195 | IPD_Astrocytes_vs_HC_Astrocytes |
| RANBP3L | 0e+00 | 0.7907816 | 0.730 | 0.506 | 0.0000000 | ENSG00000164188 | IPD_Astrocytes_vs_HC_Astrocytes |
| COL27A1 | 0e+00 | 0.7419343 | 0.671 | 0.377 | 0.0000000 | ENSG00000196739 | IPD_Astrocytes_vs_HC_Astrocytes |
| MRPS6 | 0e+00 | 0.7235911 | 0.454 | 0.153 | 0.0000000 | ENSG00000243927 | IPD_Astrocytes_vs_HC_Astrocytes |
| SLC38A2 | 0e+00 | 0.6803013 | 0.452 | 0.146 | 0.0000000 | ENSG00000134294 | IPD_Astrocytes_vs_HC_Astrocytes |
| PFKP | 0e+00 | 0.6710124 | 0.698 | 0.498 | 0.0000000 | ENSG00000067057 | IPD_Astrocytes_vs_HC_Astrocytes |
| CHI3L1 | 0e+00 | 1.1374594 | 0.497 | 0.243 | 0.0000000 | ENSG00000133048 | IPD_Astrocytes_vs_HC_Astrocytes |
| SMTN | 0e+00 | 0.8327269 | 0.386 | 0.125 | 0.0000000 | ENSG00000183963 | IPD_Astrocytes_vs_HC_Astrocytes |
| LINC00299 | 0e+00 | -0.5882425 | 0.547 | 0.760 | 0.0000000 | ENSG00000236790 | IPD_Astrocytes_vs_HC_Astrocytes |
| RALYL | 0e+00 | 1.8419993 | 0.993 | 0.643 | 0.0000000 | ENSG00000184672 | IPD_CADPS2_vs_HC_CADPS2 |
| ZNF385D | 0e+00 | 1.0845952 | 0.993 | 0.714 | 0.0000040 | ENSG00000151789 | IPD_CADPS2_vs_HC_CADPS2 |
| DLGAP2 | 0e+00 | -1.7565938 | 0.013 | 0.643 | 0.0000167 | ENSG00000198010 | IPD_CADPS2_vs_HC_CADPS2 |
| SH3RF3 | 0e+00 | -0.9779982 | 0.000 | 0.429 | 0.0008484 | ENSG00000172985 | IPD_CADPS2_vs_HC_CADPS2 |
| PTCHD4 | 0e+00 | -1.2098472 | 0.000 | 0.429 | 0.0008484 | ENSG00000244694 | IPD_CADPS2_vs_HC_CADPS2 |
| MALAT1 | 0e+00 | 0.5780715 | 1.000 | 1.000 | 0.0008618 | ENSG00000251562 | IPD_CADPS2_vs_HC_CADPS2 |
| ADARB2 | 0e+00 | -2.5981741 | 0.073 | 0.714 | 0.0013130 | ENSG00000185736 | IPD_CADPS2_vs_HC_CADPS2 |
| CADPS2 | 1e-07 | 0.8430110 | 0.973 | 0.429 | 0.0014255 | ENSG00000081803 | IPD_CADPS2_vs_HC_CADPS2 |
| MSRA | 1e-07 | 1.1843322 | 0.980 | 0.571 | 0.0018409 | ENSG00000175806 | IPD_CADPS2_vs_HC_CADPS2 |
| MT-ND4 | 1e-07 | -1.2351560 | 0.933 | 1.000 | 0.0019402 | ENSG00000198886 | IPD_CADPS2_vs_HC_CADPS2 |
| HSPA1A | 0e+00 | 1.4171708 | 0.733 | 0.315 | 0.0000000 | ENSG00000204389 | IPD_Endothelial_vs_HC_Endothelial |
| HSP90AA1 | 0e+00 | 0.9771887 | 0.887 | 0.784 | 0.0000000 | ENSG00000080824 | IPD_Endothelial_vs_HC_Endothelial |
| HSPH1 | 0e+00 | 0.9525667 | 0.607 | 0.202 | 0.0000000 | ENSG00000120694 | IPD_Endothelial_vs_HC_Endothelial |
| HSPA1B | 0e+00 | 0.9038222 | 0.431 | 0.099 | 0.0000000 | ENSG00000204388 | IPD_Endothelial_vs_HC_Endothelial |
| THSD4 | 0e+00 | -0.8865612 | 0.811 | 0.906 | 0.0000000 | ENSG00000187720 | IPD_Endothelial_vs_HC_Endothelial |
| DNAJA1 | 0e+00 | 0.6755657 | 0.619 | 0.351 | 0.0000000 | ENSG00000086061 | IPD_Endothelial_vs_HC_Endothelial |
| GBP4 | 0e+00 | 1.1350516 | 0.526 | 0.367 | 0.0000000 | ENSG00000162654 | IPD_Endothelial_vs_HC_Endothelial |
| SLC38A2 | 0e+00 | 0.6498719 | 0.712 | 0.452 | 0.0000000 | ENSG00000134294 | IPD_Endothelial_vs_HC_Endothelial |
| HSP90AB1 | 0e+00 | 0.6070880 | 0.807 | 0.718 | 0.0000000 | ENSG00000096384 | IPD_Endothelial_vs_HC_Endothelial |
| CIITA | 0e+00 | 0.6977486 | 0.231 | 0.022 | 0.0000000 | ENSG00000179583 | IPD_Endothelial_vs_HC_Endothelial |
| CFAP157 | 0e+00 | -0.6223796 | 0.926 | 0.954 | 0.0000000 | ENSG00000160401 | IPD_Ependymal_vs_HC_Ependymal |
| PLCG2 | 0e+00 | 1.0578472 | 0.780 | 0.369 | 0.0000000 | ENSG00000197943 | IPD_Ependymal_vs_HC_Ependymal |
| LMO2 | 0e+00 | -0.7900509 | 0.458 | 0.765 | 0.0000000 | ENSG00000135363 | IPD_Ependymal_vs_HC_Ependymal |
| PER1 | 0e+00 | -0.8306501 | 0.471 | 0.756 | 0.0000000 | ENSG00000179094 | IPD_Ependymal_vs_HC_Ependymal |
| SPAG8 | 0e+00 | -0.5624806 | 0.774 | 0.853 | 0.0000000 | ENSG00000137098 | IPD_Ependymal_vs_HC_Ependymal |
| FAM184A | 0e+00 | -0.7290285 | 0.644 | 0.885 | 0.0000000 | ENSG00000111879 | IPD_Ependymal_vs_HC_Ependymal |
| TOB1 | 0e+00 | -0.7759352 | 0.520 | 0.779 | 0.0000000 | ENSG00000141232 | IPD_Ependymal_vs_HC_Ependymal |
| FZD3 | 0e+00 | 0.6237558 | 0.929 | 0.682 | 0.0000000 | ENSG00000104290 | IPD_Ependymal_vs_HC_Ependymal |
| PAPLN | 0e+00 | -0.5012439 | 0.071 | 0.419 | 0.0000000 | ENSG00000100767 | IPD_Ependymal_vs_HC_Ependymal |
| CIRBP | 0e+00 | -0.6191617 | 0.375 | 0.728 | 0.0000000 | ENSG00000099622 | IPD_Ependymal_vs_HC_Ependymal |
| MEIS2 | 0e+00 | 0.8832146 | 0.496 | 0.157 | 0.0000000 | ENSG00000134138 | IPD_Excitatory_vs_HC_Excitatory |
| TFAP2D | 0e+00 | 0.4101668 | 0.292 | 0.037 | 0.0000000 | ENSG00000008197 | IPD_Excitatory_vs_HC_Excitatory |
| LHX9 | 0e+00 | 0.3182558 | 0.396 | 0.113 | 0.0000000 | ENSG00000143355 | IPD_Excitatory_vs_HC_Excitatory |
| SGCZ | 0e+00 | 0.9482440 | 0.805 | 0.549 | 0.0000000 | ENSG00000185053 | IPD_Excitatory_vs_HC_Excitatory |
| EBF3 | 0e+00 | 0.4825732 | 0.448 | 0.162 | 0.0000000 | ENSG00000108001 | IPD_Excitatory_vs_HC_Excitatory |
| MT-CO1 | 0e+00 | -0.5908875 | 0.975 | 0.994 | 0.0000000 | ENSG00000198804 | IPD_Excitatory_vs_HC_Excitatory |
| AC119673.2 | 0e+00 | 0.4238447 | 0.404 | 0.159 | 0.0000000 | ENSG00000286619 | IPD_Excitatory_vs_HC_Excitatory |
| CPNE4 | 0e+00 | -0.8363654 | 0.417 | 0.585 | 0.0000000 | ENSG00000196353 | IPD_Excitatory_vs_HC_Excitatory |
| EBF1 | 0e+00 | 0.7749330 | 0.404 | 0.160 | 0.0000000 | ENSG00000164330 | IPD_Excitatory_vs_HC_Excitatory |
| ANK1 | 0e+00 | 0.3258161 | 0.765 | 0.485 | 0.0000000 | ENSG00000029534 | IPD_Excitatory_vs_HC_Excitatory |
| PTPRT | 0e+00 | 0.8162530 | 0.967 | 0.722 | 0.0000000 | ENSG00000196090 | IPD_GABA_vs_HC_GABA |
| KCNQ1OT1 | 0e+00 | -0.8697001 | 0.734 | 0.871 | 0.0000000 | ENSG00000269821 | IPD_GABA_vs_HC_GABA |
| NEGR1 | 0e+00 | 0.7371908 | 0.963 | 0.847 | 0.0000000 | ENSG00000172260 | IPD_GABA_vs_HC_GABA |
| KCTD8 | 0e+00 | 0.8975399 | 0.910 | 0.665 | 0.0000000 | ENSG00000183783 | IPD_GABA_vs_HC_GABA |
| RALYL | 0e+00 | 0.4899623 | 0.984 | 0.984 | 0.0000000 | ENSG00000184672 | IPD_GABA_vs_HC_GABA |
| AHI1 | 0e+00 | -0.5715830 | 0.803 | 0.911 | 0.0000000 | ENSG00000135541 | IPD_GABA_vs_HC_GABA |
| SDK1 | 0e+00 | 0.6983150 | 0.943 | 0.786 | 0.0000000 | ENSG00000146555 | IPD_GABA_vs_HC_GABA |
| LINC01122 | 0e+00 | 0.6490492 | 0.881 | 0.629 | 0.0000000 | ENSG00000233723 | IPD_GABA_vs_HC_GABA |
| CCDC146 | 0e+00 | -0.7281565 | 0.402 | 0.641 | 0.0000000 | ENSG00000135205 | IPD_GABA_vs_HC_GABA |
| LDB2 | 0e+00 | 0.5049193 | 0.918 | 0.665 | 0.0000000 | ENSG00000169744 | IPD_GABA_vs_HC_GABA |
| DLX6-AS1 | 0e+00 | -0.6371891 | 0.001 | 0.227 | 0.0000000 | ENSG00000231764 | IPD_Inhibitory_vs_HC_Inhibitory |
| TFAP2B | 0e+00 | 0.4487000 | 0.246 | 0.058 | 0.0000000 | ENSG00000008196 | IPD_Inhibitory_vs_HC_Inhibitory |
| GATA3 | 0e+00 | 0.3046156 | 0.315 | 0.100 | 0.0000000 | ENSG00000107485 | IPD_Inhibitory_vs_HC_Inhibitory |
| DLX1 | 0e+00 | -0.1335462 | 0.000 | 0.108 | 0.0000000 | ENSG00000144355 | IPD_Inhibitory_vs_HC_Inhibitory |
| FAM27C | 0e+00 | -0.4504831 | 0.414 | 0.613 | 0.0000000 | ENSG00000231527 | IPD_Inhibitory_vs_HC_Inhibitory |
| LINC01210 | 0e+00 | 0.3192183 | 0.235 | 0.059 | 0.0000000 | ENSG00000239513 | IPD_Inhibitory_vs_HC_Inhibitory |
| MT-CO1 | 0e+00 | -0.4595117 | 0.954 | 0.989 | 0.0000000 | ENSG00000198804 | IPD_Inhibitory_vs_HC_Inhibitory |
| MT-ND4 | 0e+00 | -0.4393826 | 0.974 | 0.992 | 0.0000000 | ENSG00000198886 | IPD_Inhibitory_vs_HC_Inhibitory |
| AL592183.1 | 0e+00 | 0.3374974 | 0.451 | 0.229 | 0.0000000 | ENSG00000273748 | IPD_Inhibitory_vs_HC_Inhibitory |
| AC119673.2 | 0e+00 | 0.3859995 | 0.321 | 0.133 | 0.0000000 | ENSG00000286619 | IPD_Inhibitory_vs_HC_Inhibitory |
| AP001977.1 | 0e+00 | 1.2216857 | 0.384 | 0.062 | 0.0000000 | ENSG00000286044 | IPD_Microglia_vs_HC_Microglia |
| CHI3L1 | 0e+00 | 0.8058965 | 0.336 | 0.037 | 0.0000000 | ENSG00000133048 | IPD_Microglia_vs_HC_Microglia |
| LNCAROD | 0e+00 | 0.7205218 | 0.891 | 0.702 | 0.0000000 | ENSG00000231131 | IPD_Microglia_vs_HC_Microglia |
| LINC02397 | 0e+00 | 0.7770716 | 0.377 | 0.064 | 0.0000000 | ENSG00000205056 | IPD_Microglia_vs_HC_Microglia |
| HSPH1 | 0e+00 | 0.3275040 | 0.481 | 0.213 | 0.0000000 | ENSG00000120694 | IPD_Microglia_vs_HC_Microglia |
| DIRC3 | 0e+00 | 0.9870970 | 0.322 | 0.128 | 0.0000000 | ENSG00000231672 | IPD_Microglia_vs_HC_Microglia |
| NEAT1 | 0e+00 | 0.3315993 | 0.986 | 0.980 | 0.0000000 | ENSG00000245532 | IPD_Microglia_vs_HC_Microglia |
| SLC38A2 | 0e+00 | 0.4341306 | 0.557 | 0.328 | 0.0000000 | ENSG00000134294 | IPD_Microglia_vs_HC_Microglia |
| PTGDS | 0e+00 | -0.6524033 | 0.232 | 0.398 | 0.0000000 | ENSG00000107317 | IPD_Microglia_vs_HC_Microglia |
| ANK2 | 0e+00 | -0.5692483 | 0.326 | 0.489 | 0.0000000 | ENSG00000145362 | IPD_Microglia_vs_HC_Microglia |
| NEAT1 | 0e+00 | 0.8088482 | 0.985 | 0.953 | 0.0000000 | ENSG00000245532 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| CDKL1 | 0e+00 | 0.8000008 | 0.690 | 0.324 | 0.0000000 | ENSG00000100490 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| HSPA1A | 0e+00 | 0.7970088 | 0.603 | 0.312 | 0.0000000 | ENSG00000204389 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| MSRA | 0e+00 | 0.7804543 | 0.574 | 0.287 | 0.0000000 | ENSG00000175806 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| LUCAT1 | 0e+00 | 0.7771406 | 0.402 | 0.172 | 0.0000000 | ENSG00000248323 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SLC35F3 | 0e+00 | 0.6939169 | 0.472 | 0.206 | 0.0000000 | ENSG00000183780 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SLC38A2 | 0e+00 | 0.6693157 | 0.747 | 0.449 | 0.0000000 | ENSG00000134294 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| OGFRL1 | 0e+00 | 0.6056463 | 0.267 | 0.090 | 0.0000000 | ENSG00000119900 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| S100A6 | 0e+00 | 0.5979215 | 0.317 | 0.085 | 0.0000000 | ENSG00000197956 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SLC39A11 | 0e+00 | 0.5349573 | 0.943 | 0.832 | 0.0000000 | ENSG00000133195 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| GRID2 | 0e+00 | 0.5324880 | 0.629 | 0.301 | 0.0000000 | ENSG00000152208 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SELENOP | 0e+00 | 0.5289287 | 0.836 | 0.743 | 0.0000000 | ENSG00000250722 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| ANKRD18A | 0e+00 | 0.5286409 | 0.653 | 0.434 | 0.0000000 | ENSG00000180071 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| JAKMIP3 | 0e+00 | 0.5111564 | 0.726 | 0.533 | 0.0000000 | ENSG00000188385 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| FMO5 | 0e+00 | 0.5056828 | 0.371 | 0.166 | 0.0000000 | ENSG00000131781 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SEM1 | 0e+00 | 0.4968863 | 0.644 | 0.419 | 0.0000000 | ENSG00000127922 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| HSP90AA1 | 0e+00 | 0.4951208 | 0.950 | 0.899 | 0.0000000 | ENSG00000080824 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| USP31 | 0e+00 | 0.4859553 | 0.743 | 0.540 | 0.0000000 | ENSG00000103404 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| TMPRSS5 | 0e+00 | 0.4818362 | 0.360 | 0.140 | 0.0000000 | ENSG00000166682 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| FBXO32 | 0e+00 | 0.4788437 | 0.671 | 0.456 | 0.0000000 | ENSG00000156804 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| CHORDC1 | 0e+00 | 0.4729798 | 0.514 | 0.236 | 0.0000000 | ENSG00000110172 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| TPD52L1 | 0e+00 | 0.4675854 | 0.191 | 0.012 | 0.0000000 | ENSG00000111907 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| HSPH1 | 0e+00 | 0.4633020 | 0.431 | 0.183 | 0.0000000 | ENSG00000120694 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| FANCC | 0e+00 | 0.4617200 | 0.459 | 0.219 | 0.0000000 | ENSG00000158169 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| FKBP5 | 0e+00 | 0.4598565 | 0.822 | 0.656 | 0.0000000 | ENSG00000096060 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| ERBIN | 0e+00 | 0.4459280 | 0.985 | 0.970 | 0.0000000 | ENSG00000112851 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| HSPA1B | 0e+00 | 0.4374756 | 0.338 | 0.107 | 0.0000000 | ENSG00000204388 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| CPQ | 0e+00 | 0.4342444 | 0.856 | 0.711 | 0.0000000 | ENSG00000104324 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| PDE4DIP | 0e+00 | 0.4266398 | 0.789 | 0.635 | 0.0000000 | ENSG00000178104 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| QDPR | 0e+00 | 0.4217043 | 0.949 | 0.877 | 0.0000000 | ENSG00000151552 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| PTGES3 | 0e+00 | 0.4215813 | 0.645 | 0.439 | 0.0000000 | ENSG00000110958 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SEPTIN8 | 0e+00 | 0.3997771 | 0.775 | 0.621 | 0.0000000 | ENSG00000164402 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| FBXO2 | 0e+00 | 0.3931558 | 0.306 | 0.079 | 0.0000000 | ENSG00000116661 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| MAP2K4 | 0e+00 | 0.3839903 | 0.761 | 0.582 | 0.0000000 | ENSG00000065559 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| MRPS6 | 0e+00 | 0.3179451 | 0.211 | 0.029 | 0.0000000 | ENSG00000243927 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SLC44A1 | 0e+00 | 0.2767122 | 0.994 | 0.992 | 0.0000000 | ENSG00000070214 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| CTNNA3 | 0e+00 | -0.2541831 | 0.983 | 0.991 | 0.0000000 | ENSG00000183230 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| TRIM2 | 0e+00 | -0.3134105 | 0.948 | 0.970 | 0.0000000 | ENSG00000109654 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| PTK2 | 0e+00 | -0.3296688 | 0.931 | 0.961 | 0.0000000 | ENSG00000169398 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| LUC7L2 | 0e+00 | -0.3739440 | 0.609 | 0.744 | 0.0000000 | ENSG00000146963 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| AATK | 0e+00 | -0.3884724 | 0.817 | 0.902 | 0.0000000 | ENSG00000181409 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| SIK3 | 0e+00 | -0.4061477 | 0.974 | 0.981 | 0.0000000 | ENSG00000160584 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| ANK2 | 0e+00 | -0.4381293 | 0.935 | 0.967 | 0.0000000 | ENSG00000145362 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| ZFYVE16 | 0e+00 | -0.4382659 | 0.772 | 0.889 | 0.0000000 | ENSG00000039319 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| PLEKHH1 | 0e+00 | -0.4516099 | 0.892 | 0.957 | 0.0000000 | ENSG00000054690 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| KCNH8 | 0e+00 | -0.4794693 | 0.780 | 0.886 | 0.0000000 | ENSG00000183960 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| PPP2R2B | 0e+00 | -0.4893031 | 0.956 | 0.983 | 0.0000000 | ENSG00000156475 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| CARNS1 | 0e+00 | -0.5170598 | 0.626 | 0.821 | 0.0000000 | ENSG00000172508 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| LINC01949 | 0e+00 | -0.5237451 | 0.121 | 0.358 | 0.0000000 | ENSG00000233828 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| CIRBP | 0e+00 | -0.5557987 | 0.420 | 0.699 | 0.0000000 | ENSG00000099622 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| PALM2-AKAP2 | 0e+00 | -0.5633929 | 0.616 | 0.783 | 0.0000000 | ENSG00000157654 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| POLR2F | 0e+00 | -0.5739424 | 0.708 | 0.886 | 0.0000000 | ENSG00000100142 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| AC006059.1 | 0e+00 | -0.5745828 | 0.352 | 0.525 | 0.0000000 | ENSG00000230084 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| MOBP | 0e+00 | -0.5780183 | 0.907 | 0.968 | 0.0000000 | ENSG00000168314 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| NAALADL2 | 0e+00 | -0.5888721 | 0.546 | 0.771 | 0.0000000 | ENSG00000177694 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| TP53TG5 | 0e+00 | -0.6070992 | 0.364 | 0.650 | 0.0000000 | ENSG00000124251 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| AL359091.1 | 0e+00 | -0.7824083 | 0.521 | 0.759 | 0.0000000 | ENSG00000207955 | IPD_Oligodendrocytes_vs_HC_Oligodendrocytes |
| C1QL1 | 0e+00 | -0.5296440 | 0.276 | 0.566 | 0.0000000 | ENSG00000131094 | IPD_OPC_vs_HC_OPC |
| SEM1 | 0e+00 | 0.5126273 | 0.652 | 0.407 | 0.0000000 | ENSG00000127922 | IPD_OPC_vs_HC_OPC |
| XYLT1 | 0e+00 | -0.4316300 | 0.920 | 0.957 | 0.0000000 | ENSG00000103489 | IPD_OPC_vs_HC_OPC |
| EPN2 | 0e+00 | -0.3863809 | 0.936 | 0.970 | 0.0000000 | ENSG00000072134 | IPD_OPC_vs_HC_OPC |
| HSP90AA1 | 0e+00 | 0.4079712 | 0.771 | 0.623 | 0.0000000 | ENSG00000080824 | IPD_OPC_vs_HC_OPC |
| ZNF365 | 0e+00 | 0.4442608 | 0.572 | 0.359 | 0.0000000 | ENSG00000138311 | IPD_OPC_vs_HC_OPC |
| TPT1 | 0e+00 | 0.4680811 | 0.487 | 0.297 | 0.0000000 | ENSG00000133112 | IPD_OPC_vs_HC_OPC |
| ANKRD10 | 0e+00 | 0.4842279 | 0.636 | 0.526 | 0.0000000 | ENSG00000088448 | IPD_OPC_vs_HC_OPC |
| AL592183.1 | 0e+00 | 0.3508305 | 0.484 | 0.267 | 0.0000000 | ENSG00000273748 | IPD_OPC_vs_HC_OPC |
| RPL37A | 0e+00 | 0.4415866 | 0.576 | 0.415 | 0.0000000 | ENSG00000197756 | IPD_OPC_vs_HC_OPC |
| HSPA1A | 0e+00 | 0.9833418 | 0.620 | 0.275 | 0.0000000 | ENSG00000204389 | IPD_Pericytes_vs_HC_Pericytes |
| TAGLN | 0e+00 | -1.3808185 | 0.329 | 0.646 | 0.0000000 | ENSG00000149591 | IPD_Pericytes_vs_HC_Pericytes |
| SLC38A2 | 0e+00 | 0.6654154 | 0.857 | 0.709 | 0.0000000 | ENSG00000134294 | IPD_Pericytes_vs_HC_Pericytes |
| ACTB | 0e+00 | -0.5957797 | 0.869 | 0.940 | 0.0000000 | ENSG00000075624 | IPD_Pericytes_vs_HC_Pericytes |
| ADAMTS9-AS2 | 0e+00 | 0.5866795 | 0.901 | 0.679 | 0.0000000 | ENSG00000241684 | IPD_Pericytes_vs_HC_Pericytes |
| HSP90AA1 | 0e+00 | 0.5152927 | 0.889 | 0.842 | 0.0000000 | ENSG00000080824 | IPD_Pericytes_vs_HC_Pericytes |
| AKR1C1 | 0e+00 | 0.5646259 | 0.341 | 0.087 | 0.0000000 | ENSG00000187134 | IPD_Pericytes_vs_HC_Pericytes |
| LINC00882 | 0e+00 | 0.7375457 | 0.577 | 0.301 | 0.0000000 | ENSG00000242759 | IPD_Pericytes_vs_HC_Pericytes |
| RANBP3L | 0e+00 | 0.6746139 | 0.454 | 0.177 | 0.0000000 | ENSG00000164188 | IPD_Pericytes_vs_HC_Pericytes |
| RIPOR3 | 0e+00 | 0.4905358 | 0.808 | 0.596 | 0.0000000 | ENSG00000042062 | IPD_Pericytes_vs_HC_Pericytes |
# Searchable table
DEG %>%
filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 |
p_val_adj <= 0.05 & avg_logFC <= -0.25) %>%
select(Comparison,
Gene_Symbol,
Gene_Emsembl,
pct.1,
pct.2,
p_val,
p_val_adj,
avg_logFC) %>%
mutate_if(is.numeric, ~round(.,4)) %>%
DT::datatable()
Senescence Pathways
# Read Marker files
DEG_sig <- DEG %>% filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 |
p_val_adj <= 0.05 & avg_logFC <= -0.25)
# Senescence_Pathways
comparison_names = unique(DEG$Comparison)
pathways_names = list.files("../Senescence/human/")
senescence_enrichment <- mclapply(comparison_names, function(i) {
mclapply(pathways_names, function(p) {
pathway <- read_delim(paste0("../Senescence/human/",p),
"\t", escape_double = FALSE, trim_ws = TRUE)
genes <- DEG %>% filter(Comparison == i) %>% nrow()
sig_genes <- DEG_sig %>% filter(Comparison == i) %>% nrow()
pathway_back_genes <- DEG %>% filter(Comparison == i) %>%
filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
pathway_sig_genes <- DEG_sig %>% filter(Comparison == i) %>%
filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
enrichment <- data.frame(rbind(Background=c(genes-sig_genes, sig_genes),
Foreground= c(pathway_back_genes-pathway_sig_genes, pathway_sig_genes)))
names(enrichment) <- c("Nonsignificant","Significant")
data.frame("Comparison" = i,
"Genes" = genes,
"Sig_Genes" = sig_genes,
"Pathway" = p,
"Pathway_Genes" = nrow(pathway),
"Pathway_Back_Genes" = pathway_back_genes,
"Pathway_Sig_Genes" = pathway_sig_genes,
"Fisher_Pvalue" = fisher.test(enrichment, alternative="greater")$p.value,
"Fisher_OddsRatio" = fisher.test(enrichment, alternative="greater")$estimate)
})
}) %>% bind_rows()
# save the results
write_delim(senescence_enrichment,
"PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Senescence_Pathways.txt",
delim = "\t")
# Read Senescence_Pathways
senescence_enrichment <-
read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Senescence_Pathways.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
# Searchable table
senescence_enrichment %>%
arrange(Fisher_Pvalue) %>%
mutate(Fisher_Pvalue = round(Fisher_Pvalue, 4),
Fisher_OddsRatio = round(Fisher_OddsRatio, 2)) %>%
DT::datatable()
Lysosome Pathways
# Read Marker files
DEG_sig <- DEG %>% filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 |
p_val_adj <= 0.05 & avg_logFC <= -0.25)
# Lysosome_Pathways
comparison_names <- unique(DEG$Comparison)
pathways_names <- list.files("lysosomal_pathways/")
lysosome_enrichment <- mclapply(comparison_names, function(i) {
mclapply(pathways_names, function(p) {
pathway <- read_delim(paste0("lysosomal_pathways/",p),
"\t", escape_double = FALSE, trim_ws = TRUE)
genes <- DEG %>% filter(Comparison == i) %>% nrow()
sig_genes <- DEG_sig %>% filter(Comparison == i) %>% nrow()
pathway_back_genes <- DEG %>% filter(Comparison == i) %>%
filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
pathway_sig_genes <- DEG_sig %>% filter(Comparison == i) %>%
filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
enrichment <- data.frame(rbind(Background=c(genes-sig_genes, sig_genes),
Foreground= c(pathway_back_genes-pathway_sig_genes, pathway_sig_genes)))
names(enrichment) <- c("Nonsignificant","Significant")
data.frame("Comparison" = i,
"Genes" = genes,
"Sig_Genes" = sig_genes,
"Pathway" = p,
"Pathway_Genes" = nrow(pathway),
"Pathway_Back_Genes" = pathway_back_genes,
"Pathway_Sig_Genes" = pathway_sig_genes,
"Fisher_Pvalue" = fisher.test(enrichment, alternative="greater")$p.value,
"Fisher_OddsRatio" = fisher.test(enrichment, alternative="greater")$estimate)
})
}) %>% bind_rows()
# save the results
write_delim(lysosome_enrichment,
"PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Lysosome_Pathways.txt",
delim = "\t")
# Read Senescence_Pathways
lysosome_enrichment <-
read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Lysosome_Pathways.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
# Searchable table
lysosome_enrichment %>%
arrange(Fisher_Pvalue) %>%
mutate(Fisher_Pvalue = round(Fisher_Pvalue, 4),
Fisher_OddsRatio = round(Fisher_OddsRatio, 2)) %>%
DT::datatable()
#---------- Differential_Expression_GSEA
# EDIT GMT FILE cut -f 2 -d$'\t' --complement
gmt_file <- gmtPathways("Human_GOBP_AllPathways_no_GO_iea_January_13_2021_symbol_fgsea.gmt")
comparison_names <- unique(DEG$Comparison)
fgsea_enrichment <- mclapply(comparison_names, function(i) {
rnk_file <- DEG %>%
dplyr::filter(Comparison == i) %>%
dplyr::select(Comparison,
Gene_Symbol,
p_val,
avg_logFC) %>%
dplyr::mutate(sign_log10Pvalue = sign(avg_logFC) * -log10(p_val)) %>%
dplyr::arrange(sign_log10Pvalue)
rnk_file <- setNames(rnk_file$sign_log10Pvalue, rnk_file$Gene_Symbol)
fgsea_rnk <- fgsea(gmt_file, rnk_file, nperm=1000, minSize=10, maxSize=500)
fgsea_rnk %>%
dplyr::mutate(Comparison = i) %>%
dplyr::select(-leadingEdge)
}) %>% bind_rows()
# save the results
write_delim(fgsea_enrichment,
"PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_fgsea_enrichment.txt",
delim = "\t")
# Read fgsea_enrichment
fgsea_enrichment <-
read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_fgsea_enrichment.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
# Number of Pathways
fgsea_enrichment %>%
dplyr::filter(padj <= 0.05) %>%
dplyr::count(Comparison) %>%
dplyr::rename(Pathways_FDR0.05 = n) %>%
kable(caption="Number of Pathways",
format.args = list(big.mark = ",")) %>%
kable_styling(c("striped", "bordered"))
| Comparison | Pathways_FDR0.05 |
|---|---|
# Searchable table
fgsea_enrichment %>%
dplyr::filter(padj <= 0.05) %>%
dplyr::arrange(padj) %>%
dplyr::select(Comparison,
pathway,
pval,
padj,
ES,
NES) %>%
mutate_if(is.numeric, ~round(.,4)) %>%
DT::datatable()
Cdkn2a used as a marker for senescence
# Cell clusters
Idents(seurat_anno) <- seurat_anno@meta.data$cluster_annotation
# UMAP cell clusters
DimPlot(seurat_anno,
reduction="umap",
label = TRUE,
label.size = 3,
repel = TRUE,
split.by = "group")
# Cellular Senescence Markers p16-Ink4a
FeaturePlot(seurat_anno,
reduction = "umap",
features = c("CDKN2A"),
split.by = "group",
label = FALSE)
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = CDKN2A > 1,
slot = 'data')) <- 'CDKN2A_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = CDKN2A <= 1,
slot = 'data')) <- 'CDKN2A_neg'
seurat_anno@meta.data$CDKN2A <- Idents(seurat_anno)
cell_types <- unique(seurat_anno$cluster_annotation)
CDKN2A <- lapply(cell_types, function(i) {
IPD_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
CDKN2A == "CDKN2A_neg") %>% nrow()
IPD_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
CDKN2A == "CDKN2A_pos") %>% nrow()
HC_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
CDKN2A == "CDKN2A_neg") %>% nrow()
HC_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
CDKN2A == "CDKN2A_pos") %>% nrow()
data.frame("Comparison" = i,
"IPD_CDKN2A_neg" = IPD_neg,
"IPD_CDKN2A_pos" = IPD_pos,
"HC_CDKN2A_neg" = HC_neg,
"HC_CDKN2A_pos" = HC_pos,
"IPD_CDKN2A_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
"HC_CDKN2A_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()
CDKN2A %>%
mutate(IPD_CDKN2A_Percentage = round(IPD_CDKN2A_Percentage, 4),
HC_CDKN2A_Percentage = round(HC_CDKN2A_Percentage, 4)) %>%
DT::datatable()
# Boxplot of CDKN2A
df <- cbind(seurat_anno@meta.data %>%
dplyr::select(cluster_annotation, CDKN2A, group, sample),
data = seurat_anno@assays$RNA@data["CDKN2A",])
ggboxplot(df, x = "group", y = "data",
color = "group", palette =c("#00AFBB", "#E7B800"),
add = "jitter", shape = "group", size = 0.2,
title = "CDKN2A normalized counts", facet.by = "cluster_annotation")
#---------- Senescence Markers p21 Wafl/Clp1/Sdi1
FeaturePlot(seurat_anno,
reduction = "umap",
features = c("CDKN1A"),
split.by = "group",
label = FALSE)
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = CDKN1A > 1,
slot = 'data')) <- 'CDKN1A_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = CDKN1A <= 1,
slot = 'data')) <- 'CDKN1A_neg'
seurat_anno@meta.data$CDKN1A <- Idents(seurat_anno)
cell_types <- unique(seurat_anno$cluster_annotation)
CDKN1A <- lapply(cell_types, function(i) {
IPD_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
CDKN1A == "CDKN1A_neg") %>% nrow()
IPD_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
CDKN1A == "CDKN1A_pos") %>% nrow()
HC_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
CDKN1A == "CDKN1A_neg") %>% nrow()
HC_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
CDKN1A == "CDKN1A_pos") %>% nrow()
data.frame("Comparison" = i,
"IPD_CDKN1A_neg" = IPD_neg,
"IPD_CDKN1A_pos" = IPD_pos,
"HC_CDKN1A_neg" = HC_neg,
"HC_CDKN1A_pos" = HC_pos,
"IPD_CDKN1A_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
"HC_CDKN1A_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()
CDKN1A %>%
mutate(IPD_CDKN1A_Percentage = round(IPD_CDKN1A_Percentage, 4),
HC_CDKN1A_Percentage = round(HC_CDKN1A_Percentage, 4)) %>%
DT::datatable()
# Boxplot of CDKN1A
df <- cbind(seurat_anno@meta.data %>%
dplyr::select(cluster_annotation, CDKN1A, group, sample),
data = seurat_anno@assays$RNA@data["CDKN1A",])
ggboxplot(df, x = "group", y = "data",
color = "group", palette =c("#00AFBB", "#E7B800"),
add = "jitter", shape = "group", size = 0.2,
title = "CDKN1A normalized counts", facet.by = "cluster_annotation")
#---------- Senescence Markers p53 TP53
FeaturePlot(seurat_anno,
reduction = "umap",
features = c("TP53"),
split.by = "group",
label = FALSE)
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = TP53 > 1,
slot = 'data')) <- 'TP53_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = TP53 <= 1,
slot = 'data')) <- 'TP53_neg'
seurat_anno@meta.data$TP53 <- Idents(seurat_anno)
cell_types <- unique(seurat_anno$cluster_annotation)
TP53 <- lapply(cell_types, function(i) {
IPD_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
TP53 == "TP53_neg") %>% nrow()
IPD_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
TP53 == "TP53_pos") %>% nrow()
HC_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
TP53 == "TP53_neg") %>% nrow()
HC_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
TP53 == "TP53_pos") %>% nrow()
data.frame("Comparison" = i,
"IPD_TP53_neg" = IPD_neg,
"IPD_TP53_pos" = IPD_pos,
"HC_TP53_neg" = HC_neg,
"HC_TP53_pos" = HC_pos,
"IPD_TP53_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
"HC_TP53_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()
TP53 %>%
mutate(IPD_TP53_Percentage = round(IPD_TP53_Percentage, 4),
HC_TP53_Percentage = round(HC_TP53_Percentage, 4)) %>%
DT::datatable()
# Boxplot of TP53
df <- cbind(seurat_anno@meta.data %>%
dplyr::select(cluster_annotation, TP53, group, sample),
data = seurat_anno@assays$RNA@data["TP53",])
ggboxplot(df, x = "group", y = "data",
color = "group", palette =c("#00AFBB", "#E7B800"),
add = "jitter", shape = "group", size = 0.2,
title = "TP53 normalized counts", facet.by = "cluster_annotation")
#---------- Senescence Markers Beta-galactosidase GLB1
FeaturePlot(seurat_anno,
reduction = "umap",
features = c("GLB1"),
split.by = "group",
label = FALSE)
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = GLB1 > 1,
slot = 'data')) <- 'GLB1_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno,
expression = GLB1 <= 1,
slot = 'data')) <- 'GLB1_neg'
seurat_anno@meta.data$GLB1 <- Idents(seurat_anno)
cell_types <- unique(seurat_anno$cluster_annotation)
GLB1 <- lapply(cell_types, function(i) {
IPD_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
GLB1 == "GLB1_neg") %>% nrow()
IPD_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "IPD" &
GLB1 == "GLB1_pos") %>% nrow()
HC_neg <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
GLB1 == "GLB1_neg") %>% nrow()
HC_pos <- seurat_anno@meta.data %>%
filter(cluster_annotation == i & group == "HC" &
GLB1 == "GLB1_pos") %>% nrow()
data.frame("Comparison" = i,
"IPD_GLB1_neg" = IPD_neg,
"IPD_GLB1_pos" = IPD_pos,
"HC_GLB1_neg" = HC_neg,
"HC_GLB1_pos" = HC_pos,
"IPD_GLB1_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
"HC_GLB1_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()
GLB1 %>%
mutate(IPD_GLB1_Percentage = round(IPD_GLB1_Percentage, 4),
HC_GLB1_Percentage = round(HC_GLB1_Percentage, 4)) %>%
DT::datatable()
# Boxplot of GLB1
df <- cbind(seurat_anno@meta.data %>%
dplyr::select(cluster_annotation, GLB1, group, sample),
data = seurat_anno@assays$RNA@data["GLB1",])
ggboxplot(df, x = "group", y = "data",
color = "group", palette =c("#00AFBB", "#E7B800"),
add = "jitter", shape = "group", size = 0.2,
title = "GLB1 normalized counts", facet.by = "cluster_annotation")